Merge branch 'for-linus2' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris...
[platform/adaptation/renesas_rcar/renesas_kernel.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
52 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
53 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
58 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
59 MODULE_FIRMWARE("radeon/KABINI_me.bin");
60 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
61 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
62 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
63 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
64
65 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
66 extern void r600_ih_ring_fini(struct radeon_device *rdev);
67 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
68 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
69 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
70 extern void sumo_rlc_fini(struct radeon_device *rdev);
71 extern int sumo_rlc_init(struct radeon_device *rdev);
72 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
73 extern void si_rlc_reset(struct radeon_device *rdev);
74 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
75 extern int cik_sdma_resume(struct radeon_device *rdev);
76 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
77 extern void cik_sdma_fini(struct radeon_device *rdev);
78 static void cik_rlc_stop(struct radeon_device *rdev);
79 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
80 static void cik_program_aspm(struct radeon_device *rdev);
81 static void cik_init_pg(struct radeon_device *rdev);
82 static void cik_init_cg(struct radeon_device *rdev);
83 static void cik_fini_pg(struct radeon_device *rdev);
84 static void cik_fini_cg(struct radeon_device *rdev);
85 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
86                                           bool enable);
87
88 /* get temperature in millidegrees */
89 int ci_get_temp(struct radeon_device *rdev)
90 {
91         u32 temp;
92         int actual_temp = 0;
93
94         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
95                 CTF_TEMP_SHIFT;
96
97         if (temp & 0x200)
98                 actual_temp = 255;
99         else
100                 actual_temp = temp & 0x1ff;
101
102         actual_temp = actual_temp * 1000;
103
104         return actual_temp;
105 }
106
107 /* get temperature in millidegrees */
108 int kv_get_temp(struct radeon_device *rdev)
109 {
110         u32 temp;
111         int actual_temp = 0;
112
113         temp = RREG32_SMC(0xC0300E0C);
114
115         if (temp)
116                 actual_temp = (temp / 8) - 49;
117         else
118                 actual_temp = 0;
119
120         actual_temp = actual_temp * 1000;
121
122         return actual_temp;
123 }
124
125 /*
126  * Indirect registers accessor
127  */
128 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
129 {
130         unsigned long flags;
131         u32 r;
132
133         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
134         WREG32(PCIE_INDEX, reg);
135         (void)RREG32(PCIE_INDEX);
136         r = RREG32(PCIE_DATA);
137         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
138         return r;
139 }
140
141 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
142 {
143         unsigned long flags;
144
145         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
146         WREG32(PCIE_INDEX, reg);
147         (void)RREG32(PCIE_INDEX);
148         WREG32(PCIE_DATA, v);
149         (void)RREG32(PCIE_DATA);
150         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
151 }
152
153 static const u32 spectre_rlc_save_restore_register_list[] =
154 {
155         (0x0e00 << 16) | (0xc12c >> 2),
156         0x00000000,
157         (0x0e00 << 16) | (0xc140 >> 2),
158         0x00000000,
159         (0x0e00 << 16) | (0xc150 >> 2),
160         0x00000000,
161         (0x0e00 << 16) | (0xc15c >> 2),
162         0x00000000,
163         (0x0e00 << 16) | (0xc168 >> 2),
164         0x00000000,
165         (0x0e00 << 16) | (0xc170 >> 2),
166         0x00000000,
167         (0x0e00 << 16) | (0xc178 >> 2),
168         0x00000000,
169         (0x0e00 << 16) | (0xc204 >> 2),
170         0x00000000,
171         (0x0e00 << 16) | (0xc2b4 >> 2),
172         0x00000000,
173         (0x0e00 << 16) | (0xc2b8 >> 2),
174         0x00000000,
175         (0x0e00 << 16) | (0xc2bc >> 2),
176         0x00000000,
177         (0x0e00 << 16) | (0xc2c0 >> 2),
178         0x00000000,
179         (0x0e00 << 16) | (0x8228 >> 2),
180         0x00000000,
181         (0x0e00 << 16) | (0x829c >> 2),
182         0x00000000,
183         (0x0e00 << 16) | (0x869c >> 2),
184         0x00000000,
185         (0x0600 << 16) | (0x98f4 >> 2),
186         0x00000000,
187         (0x0e00 << 16) | (0x98f8 >> 2),
188         0x00000000,
189         (0x0e00 << 16) | (0x9900 >> 2),
190         0x00000000,
191         (0x0e00 << 16) | (0xc260 >> 2),
192         0x00000000,
193         (0x0e00 << 16) | (0x90e8 >> 2),
194         0x00000000,
195         (0x0e00 << 16) | (0x3c000 >> 2),
196         0x00000000,
197         (0x0e00 << 16) | (0x3c00c >> 2),
198         0x00000000,
199         (0x0e00 << 16) | (0x8c1c >> 2),
200         0x00000000,
201         (0x0e00 << 16) | (0x9700 >> 2),
202         0x00000000,
203         (0x0e00 << 16) | (0xcd20 >> 2),
204         0x00000000,
205         (0x4e00 << 16) | (0xcd20 >> 2),
206         0x00000000,
207         (0x5e00 << 16) | (0xcd20 >> 2),
208         0x00000000,
209         (0x6e00 << 16) | (0xcd20 >> 2),
210         0x00000000,
211         (0x7e00 << 16) | (0xcd20 >> 2),
212         0x00000000,
213         (0x8e00 << 16) | (0xcd20 >> 2),
214         0x00000000,
215         (0x9e00 << 16) | (0xcd20 >> 2),
216         0x00000000,
217         (0xae00 << 16) | (0xcd20 >> 2),
218         0x00000000,
219         (0xbe00 << 16) | (0xcd20 >> 2),
220         0x00000000,
221         (0x0e00 << 16) | (0x89bc >> 2),
222         0x00000000,
223         (0x0e00 << 16) | (0x8900 >> 2),
224         0x00000000,
225         0x3,
226         (0x0e00 << 16) | (0xc130 >> 2),
227         0x00000000,
228         (0x0e00 << 16) | (0xc134 >> 2),
229         0x00000000,
230         (0x0e00 << 16) | (0xc1fc >> 2),
231         0x00000000,
232         (0x0e00 << 16) | (0xc208 >> 2),
233         0x00000000,
234         (0x0e00 << 16) | (0xc264 >> 2),
235         0x00000000,
236         (0x0e00 << 16) | (0xc268 >> 2),
237         0x00000000,
238         (0x0e00 << 16) | (0xc26c >> 2),
239         0x00000000,
240         (0x0e00 << 16) | (0xc270 >> 2),
241         0x00000000,
242         (0x0e00 << 16) | (0xc274 >> 2),
243         0x00000000,
244         (0x0e00 << 16) | (0xc278 >> 2),
245         0x00000000,
246         (0x0e00 << 16) | (0xc27c >> 2),
247         0x00000000,
248         (0x0e00 << 16) | (0xc280 >> 2),
249         0x00000000,
250         (0x0e00 << 16) | (0xc284 >> 2),
251         0x00000000,
252         (0x0e00 << 16) | (0xc288 >> 2),
253         0x00000000,
254         (0x0e00 << 16) | (0xc28c >> 2),
255         0x00000000,
256         (0x0e00 << 16) | (0xc290 >> 2),
257         0x00000000,
258         (0x0e00 << 16) | (0xc294 >> 2),
259         0x00000000,
260         (0x0e00 << 16) | (0xc298 >> 2),
261         0x00000000,
262         (0x0e00 << 16) | (0xc29c >> 2),
263         0x00000000,
264         (0x0e00 << 16) | (0xc2a0 >> 2),
265         0x00000000,
266         (0x0e00 << 16) | (0xc2a4 >> 2),
267         0x00000000,
268         (0x0e00 << 16) | (0xc2a8 >> 2),
269         0x00000000,
270         (0x0e00 << 16) | (0xc2ac  >> 2),
271         0x00000000,
272         (0x0e00 << 16) | (0xc2b0 >> 2),
273         0x00000000,
274         (0x0e00 << 16) | (0x301d0 >> 2),
275         0x00000000,
276         (0x0e00 << 16) | (0x30238 >> 2),
277         0x00000000,
278         (0x0e00 << 16) | (0x30250 >> 2),
279         0x00000000,
280         (0x0e00 << 16) | (0x30254 >> 2),
281         0x00000000,
282         (0x0e00 << 16) | (0x30258 >> 2),
283         0x00000000,
284         (0x0e00 << 16) | (0x3025c >> 2),
285         0x00000000,
286         (0x4e00 << 16) | (0xc900 >> 2),
287         0x00000000,
288         (0x5e00 << 16) | (0xc900 >> 2),
289         0x00000000,
290         (0x6e00 << 16) | (0xc900 >> 2),
291         0x00000000,
292         (0x7e00 << 16) | (0xc900 >> 2),
293         0x00000000,
294         (0x8e00 << 16) | (0xc900 >> 2),
295         0x00000000,
296         (0x9e00 << 16) | (0xc900 >> 2),
297         0x00000000,
298         (0xae00 << 16) | (0xc900 >> 2),
299         0x00000000,
300         (0xbe00 << 16) | (0xc900 >> 2),
301         0x00000000,
302         (0x4e00 << 16) | (0xc904 >> 2),
303         0x00000000,
304         (0x5e00 << 16) | (0xc904 >> 2),
305         0x00000000,
306         (0x6e00 << 16) | (0xc904 >> 2),
307         0x00000000,
308         (0x7e00 << 16) | (0xc904 >> 2),
309         0x00000000,
310         (0x8e00 << 16) | (0xc904 >> 2),
311         0x00000000,
312         (0x9e00 << 16) | (0xc904 >> 2),
313         0x00000000,
314         (0xae00 << 16) | (0xc904 >> 2),
315         0x00000000,
316         (0xbe00 << 16) | (0xc904 >> 2),
317         0x00000000,
318         (0x4e00 << 16) | (0xc908 >> 2),
319         0x00000000,
320         (0x5e00 << 16) | (0xc908 >> 2),
321         0x00000000,
322         (0x6e00 << 16) | (0xc908 >> 2),
323         0x00000000,
324         (0x7e00 << 16) | (0xc908 >> 2),
325         0x00000000,
326         (0x8e00 << 16) | (0xc908 >> 2),
327         0x00000000,
328         (0x9e00 << 16) | (0xc908 >> 2),
329         0x00000000,
330         (0xae00 << 16) | (0xc908 >> 2),
331         0x00000000,
332         (0xbe00 << 16) | (0xc908 >> 2),
333         0x00000000,
334         (0x4e00 << 16) | (0xc90c >> 2),
335         0x00000000,
336         (0x5e00 << 16) | (0xc90c >> 2),
337         0x00000000,
338         (0x6e00 << 16) | (0xc90c >> 2),
339         0x00000000,
340         (0x7e00 << 16) | (0xc90c >> 2),
341         0x00000000,
342         (0x8e00 << 16) | (0xc90c >> 2),
343         0x00000000,
344         (0x9e00 << 16) | (0xc90c >> 2),
345         0x00000000,
346         (0xae00 << 16) | (0xc90c >> 2),
347         0x00000000,
348         (0xbe00 << 16) | (0xc90c >> 2),
349         0x00000000,
350         (0x4e00 << 16) | (0xc910 >> 2),
351         0x00000000,
352         (0x5e00 << 16) | (0xc910 >> 2),
353         0x00000000,
354         (0x6e00 << 16) | (0xc910 >> 2),
355         0x00000000,
356         (0x7e00 << 16) | (0xc910 >> 2),
357         0x00000000,
358         (0x8e00 << 16) | (0xc910 >> 2),
359         0x00000000,
360         (0x9e00 << 16) | (0xc910 >> 2),
361         0x00000000,
362         (0xae00 << 16) | (0xc910 >> 2),
363         0x00000000,
364         (0xbe00 << 16) | (0xc910 >> 2),
365         0x00000000,
366         (0x0e00 << 16) | (0xc99c >> 2),
367         0x00000000,
368         (0x0e00 << 16) | (0x9834 >> 2),
369         0x00000000,
370         (0x0000 << 16) | (0x30f00 >> 2),
371         0x00000000,
372         (0x0001 << 16) | (0x30f00 >> 2),
373         0x00000000,
374         (0x0000 << 16) | (0x30f04 >> 2),
375         0x00000000,
376         (0x0001 << 16) | (0x30f04 >> 2),
377         0x00000000,
378         (0x0000 << 16) | (0x30f08 >> 2),
379         0x00000000,
380         (0x0001 << 16) | (0x30f08 >> 2),
381         0x00000000,
382         (0x0000 << 16) | (0x30f0c >> 2),
383         0x00000000,
384         (0x0001 << 16) | (0x30f0c >> 2),
385         0x00000000,
386         (0x0600 << 16) | (0x9b7c >> 2),
387         0x00000000,
388         (0x0e00 << 16) | (0x8a14 >> 2),
389         0x00000000,
390         (0x0e00 << 16) | (0x8a18 >> 2),
391         0x00000000,
392         (0x0600 << 16) | (0x30a00 >> 2),
393         0x00000000,
394         (0x0e00 << 16) | (0x8bf0 >> 2),
395         0x00000000,
396         (0x0e00 << 16) | (0x8bcc >> 2),
397         0x00000000,
398         (0x0e00 << 16) | (0x8b24 >> 2),
399         0x00000000,
400         (0x0e00 << 16) | (0x30a04 >> 2),
401         0x00000000,
402         (0x0600 << 16) | (0x30a10 >> 2),
403         0x00000000,
404         (0x0600 << 16) | (0x30a14 >> 2),
405         0x00000000,
406         (0x0600 << 16) | (0x30a18 >> 2),
407         0x00000000,
408         (0x0600 << 16) | (0x30a2c >> 2),
409         0x00000000,
410         (0x0e00 << 16) | (0xc700 >> 2),
411         0x00000000,
412         (0x0e00 << 16) | (0xc704 >> 2),
413         0x00000000,
414         (0x0e00 << 16) | (0xc708 >> 2),
415         0x00000000,
416         (0x0e00 << 16) | (0xc768 >> 2),
417         0x00000000,
418         (0x0400 << 16) | (0xc770 >> 2),
419         0x00000000,
420         (0x0400 << 16) | (0xc774 >> 2),
421         0x00000000,
422         (0x0400 << 16) | (0xc778 >> 2),
423         0x00000000,
424         (0x0400 << 16) | (0xc77c >> 2),
425         0x00000000,
426         (0x0400 << 16) | (0xc780 >> 2),
427         0x00000000,
428         (0x0400 << 16) | (0xc784 >> 2),
429         0x00000000,
430         (0x0400 << 16) | (0xc788 >> 2),
431         0x00000000,
432         (0x0400 << 16) | (0xc78c >> 2),
433         0x00000000,
434         (0x0400 << 16) | (0xc798 >> 2),
435         0x00000000,
436         (0x0400 << 16) | (0xc79c >> 2),
437         0x00000000,
438         (0x0400 << 16) | (0xc7a0 >> 2),
439         0x00000000,
440         (0x0400 << 16) | (0xc7a4 >> 2),
441         0x00000000,
442         (0x0400 << 16) | (0xc7a8 >> 2),
443         0x00000000,
444         (0x0400 << 16) | (0xc7ac >> 2),
445         0x00000000,
446         (0x0400 << 16) | (0xc7b0 >> 2),
447         0x00000000,
448         (0x0400 << 16) | (0xc7b4 >> 2),
449         0x00000000,
450         (0x0e00 << 16) | (0x9100 >> 2),
451         0x00000000,
452         (0x0e00 << 16) | (0x3c010 >> 2),
453         0x00000000,
454         (0x0e00 << 16) | (0x92a8 >> 2),
455         0x00000000,
456         (0x0e00 << 16) | (0x92ac >> 2),
457         0x00000000,
458         (0x0e00 << 16) | (0x92b4 >> 2),
459         0x00000000,
460         (0x0e00 << 16) | (0x92b8 >> 2),
461         0x00000000,
462         (0x0e00 << 16) | (0x92bc >> 2),
463         0x00000000,
464         (0x0e00 << 16) | (0x92c0 >> 2),
465         0x00000000,
466         (0x0e00 << 16) | (0x92c4 >> 2),
467         0x00000000,
468         (0x0e00 << 16) | (0x92c8 >> 2),
469         0x00000000,
470         (0x0e00 << 16) | (0x92cc >> 2),
471         0x00000000,
472         (0x0e00 << 16) | (0x92d0 >> 2),
473         0x00000000,
474         (0x0e00 << 16) | (0x8c00 >> 2),
475         0x00000000,
476         (0x0e00 << 16) | (0x8c04 >> 2),
477         0x00000000,
478         (0x0e00 << 16) | (0x8c20 >> 2),
479         0x00000000,
480         (0x0e00 << 16) | (0x8c38 >> 2),
481         0x00000000,
482         (0x0e00 << 16) | (0x8c3c >> 2),
483         0x00000000,
484         (0x0e00 << 16) | (0xae00 >> 2),
485         0x00000000,
486         (0x0e00 << 16) | (0x9604 >> 2),
487         0x00000000,
488         (0x0e00 << 16) | (0xac08 >> 2),
489         0x00000000,
490         (0x0e00 << 16) | (0xac0c >> 2),
491         0x00000000,
492         (0x0e00 << 16) | (0xac10 >> 2),
493         0x00000000,
494         (0x0e00 << 16) | (0xac14 >> 2),
495         0x00000000,
496         (0x0e00 << 16) | (0xac58 >> 2),
497         0x00000000,
498         (0x0e00 << 16) | (0xac68 >> 2),
499         0x00000000,
500         (0x0e00 << 16) | (0xac6c >> 2),
501         0x00000000,
502         (0x0e00 << 16) | (0xac70 >> 2),
503         0x00000000,
504         (0x0e00 << 16) | (0xac74 >> 2),
505         0x00000000,
506         (0x0e00 << 16) | (0xac78 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0xac7c >> 2),
509         0x00000000,
510         (0x0e00 << 16) | (0xac80 >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0xac84 >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0xac88 >> 2),
515         0x00000000,
516         (0x0e00 << 16) | (0xac8c >> 2),
517         0x00000000,
518         (0x0e00 << 16) | (0x970c >> 2),
519         0x00000000,
520         (0x0e00 << 16) | (0x9714 >> 2),
521         0x00000000,
522         (0x0e00 << 16) | (0x9718 >> 2),
523         0x00000000,
524         (0x0e00 << 16) | (0x971c >> 2),
525         0x00000000,
526         (0x0e00 << 16) | (0x31068 >> 2),
527         0x00000000,
528         (0x4e00 << 16) | (0x31068 >> 2),
529         0x00000000,
530         (0x5e00 << 16) | (0x31068 >> 2),
531         0x00000000,
532         (0x6e00 << 16) | (0x31068 >> 2),
533         0x00000000,
534         (0x7e00 << 16) | (0x31068 >> 2),
535         0x00000000,
536         (0x8e00 << 16) | (0x31068 >> 2),
537         0x00000000,
538         (0x9e00 << 16) | (0x31068 >> 2),
539         0x00000000,
540         (0xae00 << 16) | (0x31068 >> 2),
541         0x00000000,
542         (0xbe00 << 16) | (0x31068 >> 2),
543         0x00000000,
544         (0x0e00 << 16) | (0xcd10 >> 2),
545         0x00000000,
546         (0x0e00 << 16) | (0xcd14 >> 2),
547         0x00000000,
548         (0x0e00 << 16) | (0x88b0 >> 2),
549         0x00000000,
550         (0x0e00 << 16) | (0x88b4 >> 2),
551         0x00000000,
552         (0x0e00 << 16) | (0x88b8 >> 2),
553         0x00000000,
554         (0x0e00 << 16) | (0x88bc >> 2),
555         0x00000000,
556         (0x0400 << 16) | (0x89c0 >> 2),
557         0x00000000,
558         (0x0e00 << 16) | (0x88c4 >> 2),
559         0x00000000,
560         (0x0e00 << 16) | (0x88c8 >> 2),
561         0x00000000,
562         (0x0e00 << 16) | (0x88d0 >> 2),
563         0x00000000,
564         (0x0e00 << 16) | (0x88d4 >> 2),
565         0x00000000,
566         (0x0e00 << 16) | (0x88d8 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x8980 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x30938 >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x3093c >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x30940 >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x89a0 >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x30900 >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x30904 >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x89b4 >> 2),
583         0x00000000,
584         (0x0e00 << 16) | (0x3c210 >> 2),
585         0x00000000,
586         (0x0e00 << 16) | (0x3c214 >> 2),
587         0x00000000,
588         (0x0e00 << 16) | (0x3c218 >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0x8904 >> 2),
591         0x00000000,
592         0x5,
593         (0x0e00 << 16) | (0x8c28 >> 2),
594         (0x0e00 << 16) | (0x8c2c >> 2),
595         (0x0e00 << 16) | (0x8c30 >> 2),
596         (0x0e00 << 16) | (0x8c34 >> 2),
597         (0x0e00 << 16) | (0x9600 >> 2),
598 };
599
600 static const u32 kalindi_rlc_save_restore_register_list[] =
601 {
602         (0x0e00 << 16) | (0xc12c >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0xc140 >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xc150 >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xc15c >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0xc168 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0xc170 >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0xc204 >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0xc2b4 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0xc2b8 >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0xc2bc >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0xc2c0 >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0x8228 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0x829c >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0x869c >> 2),
629         0x00000000,
630         (0x0600 << 16) | (0x98f4 >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0x98f8 >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0x9900 >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0xc260 >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0x90e8 >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0x3c000 >> 2),
641         0x00000000,
642         (0x0e00 << 16) | (0x3c00c >> 2),
643         0x00000000,
644         (0x0e00 << 16) | (0x8c1c >> 2),
645         0x00000000,
646         (0x0e00 << 16) | (0x9700 >> 2),
647         0x00000000,
648         (0x0e00 << 16) | (0xcd20 >> 2),
649         0x00000000,
650         (0x4e00 << 16) | (0xcd20 >> 2),
651         0x00000000,
652         (0x5e00 << 16) | (0xcd20 >> 2),
653         0x00000000,
654         (0x6e00 << 16) | (0xcd20 >> 2),
655         0x00000000,
656         (0x7e00 << 16) | (0xcd20 >> 2),
657         0x00000000,
658         (0x0e00 << 16) | (0x89bc >> 2),
659         0x00000000,
660         (0x0e00 << 16) | (0x8900 >> 2),
661         0x00000000,
662         0x3,
663         (0x0e00 << 16) | (0xc130 >> 2),
664         0x00000000,
665         (0x0e00 << 16) | (0xc134 >> 2),
666         0x00000000,
667         (0x0e00 << 16) | (0xc1fc >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0xc208 >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0xc264 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0xc268 >> 2),
674         0x00000000,
675         (0x0e00 << 16) | (0xc26c >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0xc270 >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0xc274 >> 2),
680         0x00000000,
681         (0x0e00 << 16) | (0xc28c >> 2),
682         0x00000000,
683         (0x0e00 << 16) | (0xc290 >> 2),
684         0x00000000,
685         (0x0e00 << 16) | (0xc294 >> 2),
686         0x00000000,
687         (0x0e00 << 16) | (0xc298 >> 2),
688         0x00000000,
689         (0x0e00 << 16) | (0xc2a0 >> 2),
690         0x00000000,
691         (0x0e00 << 16) | (0xc2a4 >> 2),
692         0x00000000,
693         (0x0e00 << 16) | (0xc2a8 >> 2),
694         0x00000000,
695         (0x0e00 << 16) | (0xc2ac >> 2),
696         0x00000000,
697         (0x0e00 << 16) | (0x301d0 >> 2),
698         0x00000000,
699         (0x0e00 << 16) | (0x30238 >> 2),
700         0x00000000,
701         (0x0e00 << 16) | (0x30250 >> 2),
702         0x00000000,
703         (0x0e00 << 16) | (0x30254 >> 2),
704         0x00000000,
705         (0x0e00 << 16) | (0x30258 >> 2),
706         0x00000000,
707         (0x0e00 << 16) | (0x3025c >> 2),
708         0x00000000,
709         (0x4e00 << 16) | (0xc900 >> 2),
710         0x00000000,
711         (0x5e00 << 16) | (0xc900 >> 2),
712         0x00000000,
713         (0x6e00 << 16) | (0xc900 >> 2),
714         0x00000000,
715         (0x7e00 << 16) | (0xc900 >> 2),
716         0x00000000,
717         (0x4e00 << 16) | (0xc904 >> 2),
718         0x00000000,
719         (0x5e00 << 16) | (0xc904 >> 2),
720         0x00000000,
721         (0x6e00 << 16) | (0xc904 >> 2),
722         0x00000000,
723         (0x7e00 << 16) | (0xc904 >> 2),
724         0x00000000,
725         (0x4e00 << 16) | (0xc908 >> 2),
726         0x00000000,
727         (0x5e00 << 16) | (0xc908 >> 2),
728         0x00000000,
729         (0x6e00 << 16) | (0xc908 >> 2),
730         0x00000000,
731         (0x7e00 << 16) | (0xc908 >> 2),
732         0x00000000,
733         (0x4e00 << 16) | (0xc90c >> 2),
734         0x00000000,
735         (0x5e00 << 16) | (0xc90c >> 2),
736         0x00000000,
737         (0x6e00 << 16) | (0xc90c >> 2),
738         0x00000000,
739         (0x7e00 << 16) | (0xc90c >> 2),
740         0x00000000,
741         (0x4e00 << 16) | (0xc910 >> 2),
742         0x00000000,
743         (0x5e00 << 16) | (0xc910 >> 2),
744         0x00000000,
745         (0x6e00 << 16) | (0xc910 >> 2),
746         0x00000000,
747         (0x7e00 << 16) | (0xc910 >> 2),
748         0x00000000,
749         (0x0e00 << 16) | (0xc99c >> 2),
750         0x00000000,
751         (0x0e00 << 16) | (0x9834 >> 2),
752         0x00000000,
753         (0x0000 << 16) | (0x30f00 >> 2),
754         0x00000000,
755         (0x0000 << 16) | (0x30f04 >> 2),
756         0x00000000,
757         (0x0000 << 16) | (0x30f08 >> 2),
758         0x00000000,
759         (0x0000 << 16) | (0x30f0c >> 2),
760         0x00000000,
761         (0x0600 << 16) | (0x9b7c >> 2),
762         0x00000000,
763         (0x0e00 << 16) | (0x8a14 >> 2),
764         0x00000000,
765         (0x0e00 << 16) | (0x8a18 >> 2),
766         0x00000000,
767         (0x0600 << 16) | (0x30a00 >> 2),
768         0x00000000,
769         (0x0e00 << 16) | (0x8bf0 >> 2),
770         0x00000000,
771         (0x0e00 << 16) | (0x8bcc >> 2),
772         0x00000000,
773         (0x0e00 << 16) | (0x8b24 >> 2),
774         0x00000000,
775         (0x0e00 << 16) | (0x30a04 >> 2),
776         0x00000000,
777         (0x0600 << 16) | (0x30a10 >> 2),
778         0x00000000,
779         (0x0600 << 16) | (0x30a14 >> 2),
780         0x00000000,
781         (0x0600 << 16) | (0x30a18 >> 2),
782         0x00000000,
783         (0x0600 << 16) | (0x30a2c >> 2),
784         0x00000000,
785         (0x0e00 << 16) | (0xc700 >> 2),
786         0x00000000,
787         (0x0e00 << 16) | (0xc704 >> 2),
788         0x00000000,
789         (0x0e00 << 16) | (0xc708 >> 2),
790         0x00000000,
791         (0x0e00 << 16) | (0xc768 >> 2),
792         0x00000000,
793         (0x0400 << 16) | (0xc770 >> 2),
794         0x00000000,
795         (0x0400 << 16) | (0xc774 >> 2),
796         0x00000000,
797         (0x0400 << 16) | (0xc798 >> 2),
798         0x00000000,
799         (0x0400 << 16) | (0xc79c >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0x9100 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0x3c010 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0x8c00 >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0x8c04 >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0x8c20 >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0x8c38 >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0x8c3c >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0xae00 >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0x9604 >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0xac08 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0xac0c >> 2),
822         0x00000000,
823         (0x0e00 << 16) | (0xac10 >> 2),
824         0x00000000,
825         (0x0e00 << 16) | (0xac14 >> 2),
826         0x00000000,
827         (0x0e00 << 16) | (0xac58 >> 2),
828         0x00000000,
829         (0x0e00 << 16) | (0xac68 >> 2),
830         0x00000000,
831         (0x0e00 << 16) | (0xac6c >> 2),
832         0x00000000,
833         (0x0e00 << 16) | (0xac70 >> 2),
834         0x00000000,
835         (0x0e00 << 16) | (0xac74 >> 2),
836         0x00000000,
837         (0x0e00 << 16) | (0xac78 >> 2),
838         0x00000000,
839         (0x0e00 << 16) | (0xac7c >> 2),
840         0x00000000,
841         (0x0e00 << 16) | (0xac80 >> 2),
842         0x00000000,
843         (0x0e00 << 16) | (0xac84 >> 2),
844         0x00000000,
845         (0x0e00 << 16) | (0xac88 >> 2),
846         0x00000000,
847         (0x0e00 << 16) | (0xac8c >> 2),
848         0x00000000,
849         (0x0e00 << 16) | (0x970c >> 2),
850         0x00000000,
851         (0x0e00 << 16) | (0x9714 >> 2),
852         0x00000000,
853         (0x0e00 << 16) | (0x9718 >> 2),
854         0x00000000,
855         (0x0e00 << 16) | (0x971c >> 2),
856         0x00000000,
857         (0x0e00 << 16) | (0x31068 >> 2),
858         0x00000000,
859         (0x4e00 << 16) | (0x31068 >> 2),
860         0x00000000,
861         (0x5e00 << 16) | (0x31068 >> 2),
862         0x00000000,
863         (0x6e00 << 16) | (0x31068 >> 2),
864         0x00000000,
865         (0x7e00 << 16) | (0x31068 >> 2),
866         0x00000000,
867         (0x0e00 << 16) | (0xcd10 >> 2),
868         0x00000000,
869         (0x0e00 << 16) | (0xcd14 >> 2),
870         0x00000000,
871         (0x0e00 << 16) | (0x88b0 >> 2),
872         0x00000000,
873         (0x0e00 << 16) | (0x88b4 >> 2),
874         0x00000000,
875         (0x0e00 << 16) | (0x88b8 >> 2),
876         0x00000000,
877         (0x0e00 << 16) | (0x88bc >> 2),
878         0x00000000,
879         (0x0400 << 16) | (0x89c0 >> 2),
880         0x00000000,
881         (0x0e00 << 16) | (0x88c4 >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0x88c8 >> 2),
884         0x00000000,
885         (0x0e00 << 16) | (0x88d0 >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x88d4 >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x88d8 >> 2),
890         0x00000000,
891         (0x0e00 << 16) | (0x8980 >> 2),
892         0x00000000,
893         (0x0e00 << 16) | (0x30938 >> 2),
894         0x00000000,
895         (0x0e00 << 16) | (0x3093c >> 2),
896         0x00000000,
897         (0x0e00 << 16) | (0x30940 >> 2),
898         0x00000000,
899         (0x0e00 << 16) | (0x89a0 >> 2),
900         0x00000000,
901         (0x0e00 << 16) | (0x30900 >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0x30904 >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0x89b4 >> 2),
906         0x00000000,
907         (0x0e00 << 16) | (0x3e1fc >> 2),
908         0x00000000,
909         (0x0e00 << 16) | (0x3c210 >> 2),
910         0x00000000,
911         (0x0e00 << 16) | (0x3c214 >> 2),
912         0x00000000,
913         (0x0e00 << 16) | (0x3c218 >> 2),
914         0x00000000,
915         (0x0e00 << 16) | (0x8904 >> 2),
916         0x00000000,
917         0x5,
918         (0x0e00 << 16) | (0x8c28 >> 2),
919         (0x0e00 << 16) | (0x8c2c >> 2),
920         (0x0e00 << 16) | (0x8c30 >> 2),
921         (0x0e00 << 16) | (0x8c34 >> 2),
922         (0x0e00 << 16) | (0x9600 >> 2),
923 };
924
925 static const u32 bonaire_golden_spm_registers[] =
926 {
927         0x30800, 0xe0ffffff, 0xe0000000
928 };
929
930 static const u32 bonaire_golden_common_registers[] =
931 {
932         0xc770, 0xffffffff, 0x00000800,
933         0xc774, 0xffffffff, 0x00000800,
934         0xc798, 0xffffffff, 0x00007fbf,
935         0xc79c, 0xffffffff, 0x00007faf
936 };
937
938 static const u32 bonaire_golden_registers[] =
939 {
940         0x3354, 0x00000333, 0x00000333,
941         0x3350, 0x000c0fc0, 0x00040200,
942         0x9a10, 0x00010000, 0x00058208,
943         0x3c000, 0xffff1fff, 0x00140000,
944         0x3c200, 0xfdfc0fff, 0x00000100,
945         0x3c234, 0x40000000, 0x40000200,
946         0x9830, 0xffffffff, 0x00000000,
947         0x9834, 0xf00fffff, 0x00000400,
948         0x9838, 0x0002021c, 0x00020200,
949         0xc78, 0x00000080, 0x00000000,
950         0x5bb0, 0x000000f0, 0x00000070,
951         0x5bc0, 0xf0311fff, 0x80300000,
952         0x98f8, 0x73773777, 0x12010001,
953         0x350c, 0x00810000, 0x408af000,
954         0x7030, 0x31000111, 0x00000011,
955         0x2f48, 0x73773777, 0x12010001,
956         0x220c, 0x00007fb6, 0x0021a1b1,
957         0x2210, 0x00007fb6, 0x002021b1,
958         0x2180, 0x00007fb6, 0x00002191,
959         0x2218, 0x00007fb6, 0x002121b1,
960         0x221c, 0x00007fb6, 0x002021b1,
961         0x21dc, 0x00007fb6, 0x00002191,
962         0x21e0, 0x00007fb6, 0x00002191,
963         0x3628, 0x0000003f, 0x0000000a,
964         0x362c, 0x0000003f, 0x0000000a,
965         0x2ae4, 0x00073ffe, 0x000022a2,
966         0x240c, 0x000007ff, 0x00000000,
967         0x8a14, 0xf000003f, 0x00000007,
968         0x8bf0, 0x00002001, 0x00000001,
969         0x8b24, 0xffffffff, 0x00ffffff,
970         0x30a04, 0x0000ff0f, 0x00000000,
971         0x28a4c, 0x07ffffff, 0x06000000,
972         0x4d8, 0x00000fff, 0x00000100,
973         0x3e78, 0x00000001, 0x00000002,
974         0x9100, 0x03000000, 0x0362c688,
975         0x8c00, 0x000000ff, 0x00000001,
976         0xe40, 0x00001fff, 0x00001fff,
977         0x9060, 0x0000007f, 0x00000020,
978         0x9508, 0x00010000, 0x00010000,
979         0xac14, 0x000003ff, 0x000000f3,
980         0xac0c, 0xffffffff, 0x00001032
981 };
982
983 static const u32 bonaire_mgcg_cgcg_init[] =
984 {
985         0xc420, 0xffffffff, 0xfffffffc,
986         0x30800, 0xffffffff, 0xe0000000,
987         0x3c2a0, 0xffffffff, 0x00000100,
988         0x3c208, 0xffffffff, 0x00000100,
989         0x3c2c0, 0xffffffff, 0xc0000100,
990         0x3c2c8, 0xffffffff, 0xc0000100,
991         0x3c2c4, 0xffffffff, 0xc0000100,
992         0x55e4, 0xffffffff, 0x00600100,
993         0x3c280, 0xffffffff, 0x00000100,
994         0x3c214, 0xffffffff, 0x06000100,
995         0x3c220, 0xffffffff, 0x00000100,
996         0x3c218, 0xffffffff, 0x06000100,
997         0x3c204, 0xffffffff, 0x00000100,
998         0x3c2e0, 0xffffffff, 0x00000100,
999         0x3c224, 0xffffffff, 0x00000100,
1000         0x3c200, 0xffffffff, 0x00000100,
1001         0x3c230, 0xffffffff, 0x00000100,
1002         0x3c234, 0xffffffff, 0x00000100,
1003         0x3c250, 0xffffffff, 0x00000100,
1004         0x3c254, 0xffffffff, 0x00000100,
1005         0x3c258, 0xffffffff, 0x00000100,
1006         0x3c25c, 0xffffffff, 0x00000100,
1007         0x3c260, 0xffffffff, 0x00000100,
1008         0x3c27c, 0xffffffff, 0x00000100,
1009         0x3c278, 0xffffffff, 0x00000100,
1010         0x3c210, 0xffffffff, 0x06000100,
1011         0x3c290, 0xffffffff, 0x00000100,
1012         0x3c274, 0xffffffff, 0x00000100,
1013         0x3c2b4, 0xffffffff, 0x00000100,
1014         0x3c2b0, 0xffffffff, 0x00000100,
1015         0x3c270, 0xffffffff, 0x00000100,
1016         0x30800, 0xffffffff, 0xe0000000,
1017         0x3c020, 0xffffffff, 0x00010000,
1018         0x3c024, 0xffffffff, 0x00030002,
1019         0x3c028, 0xffffffff, 0x00040007,
1020         0x3c02c, 0xffffffff, 0x00060005,
1021         0x3c030, 0xffffffff, 0x00090008,
1022         0x3c034, 0xffffffff, 0x00010000,
1023         0x3c038, 0xffffffff, 0x00030002,
1024         0x3c03c, 0xffffffff, 0x00040007,
1025         0x3c040, 0xffffffff, 0x00060005,
1026         0x3c044, 0xffffffff, 0x00090008,
1027         0x3c048, 0xffffffff, 0x00010000,
1028         0x3c04c, 0xffffffff, 0x00030002,
1029         0x3c050, 0xffffffff, 0x00040007,
1030         0x3c054, 0xffffffff, 0x00060005,
1031         0x3c058, 0xffffffff, 0x00090008,
1032         0x3c05c, 0xffffffff, 0x00010000,
1033         0x3c060, 0xffffffff, 0x00030002,
1034         0x3c064, 0xffffffff, 0x00040007,
1035         0x3c068, 0xffffffff, 0x00060005,
1036         0x3c06c, 0xffffffff, 0x00090008,
1037         0x3c070, 0xffffffff, 0x00010000,
1038         0x3c074, 0xffffffff, 0x00030002,
1039         0x3c078, 0xffffffff, 0x00040007,
1040         0x3c07c, 0xffffffff, 0x00060005,
1041         0x3c080, 0xffffffff, 0x00090008,
1042         0x3c084, 0xffffffff, 0x00010000,
1043         0x3c088, 0xffffffff, 0x00030002,
1044         0x3c08c, 0xffffffff, 0x00040007,
1045         0x3c090, 0xffffffff, 0x00060005,
1046         0x3c094, 0xffffffff, 0x00090008,
1047         0x3c098, 0xffffffff, 0x00010000,
1048         0x3c09c, 0xffffffff, 0x00030002,
1049         0x3c0a0, 0xffffffff, 0x00040007,
1050         0x3c0a4, 0xffffffff, 0x00060005,
1051         0x3c0a8, 0xffffffff, 0x00090008,
1052         0x3c000, 0xffffffff, 0x96e00200,
1053         0x8708, 0xffffffff, 0x00900100,
1054         0xc424, 0xffffffff, 0x0020003f,
1055         0x38, 0xffffffff, 0x0140001c,
1056         0x3c, 0x000f0000, 0x000f0000,
1057         0x220, 0xffffffff, 0xC060000C,
1058         0x224, 0xc0000fff, 0x00000100,
1059         0xf90, 0xffffffff, 0x00000100,
1060         0xf98, 0x00000101, 0x00000000,
1061         0x20a8, 0xffffffff, 0x00000104,
1062         0x55e4, 0xff000fff, 0x00000100,
1063         0x30cc, 0xc0000fff, 0x00000104,
1064         0xc1e4, 0x00000001, 0x00000001,
1065         0xd00c, 0xff000ff0, 0x00000100,
1066         0xd80c, 0xff000ff0, 0x00000100
1067 };
1068
1069 static const u32 spectre_golden_spm_registers[] =
1070 {
1071         0x30800, 0xe0ffffff, 0xe0000000
1072 };
1073
1074 static const u32 spectre_golden_common_registers[] =
1075 {
1076         0xc770, 0xffffffff, 0x00000800,
1077         0xc774, 0xffffffff, 0x00000800,
1078         0xc798, 0xffffffff, 0x00007fbf,
1079         0xc79c, 0xffffffff, 0x00007faf
1080 };
1081
1082 static const u32 spectre_golden_registers[] =
1083 {
1084         0x3c000, 0xffff1fff, 0x96940200,
1085         0x3c00c, 0xffff0001, 0xff000000,
1086         0x3c200, 0xfffc0fff, 0x00000100,
1087         0x6ed8, 0x00010101, 0x00010000,
1088         0x9834, 0xf00fffff, 0x00000400,
1089         0x9838, 0xfffffffc, 0x00020200,
1090         0x5bb0, 0x000000f0, 0x00000070,
1091         0x5bc0, 0xf0311fff, 0x80300000,
1092         0x98f8, 0x73773777, 0x12010001,
1093         0x9b7c, 0x00ff0000, 0x00fc0000,
1094         0x2f48, 0x73773777, 0x12010001,
1095         0x8a14, 0xf000003f, 0x00000007,
1096         0x8b24, 0xffffffff, 0x00ffffff,
1097         0x28350, 0x3f3f3fff, 0x00000082,
1098         0x28355, 0x0000003f, 0x00000000,
1099         0x3e78, 0x00000001, 0x00000002,
1100         0x913c, 0xffff03df, 0x00000004,
1101         0xc768, 0x00000008, 0x00000008,
1102         0x8c00, 0x000008ff, 0x00000800,
1103         0x9508, 0x00010000, 0x00010000,
1104         0xac0c, 0xffffffff, 0x54763210,
1105         0x214f8, 0x01ff01ff, 0x00000002,
1106         0x21498, 0x007ff800, 0x00200000,
1107         0x2015c, 0xffffffff, 0x00000f40,
1108         0x30934, 0xffffffff, 0x00000001
1109 };
1110
1111 static const u32 spectre_mgcg_cgcg_init[] =
1112 {
1113         0xc420, 0xffffffff, 0xfffffffc,
1114         0x30800, 0xffffffff, 0xe0000000,
1115         0x3c2a0, 0xffffffff, 0x00000100,
1116         0x3c208, 0xffffffff, 0x00000100,
1117         0x3c2c0, 0xffffffff, 0x00000100,
1118         0x3c2c8, 0xffffffff, 0x00000100,
1119         0x3c2c4, 0xffffffff, 0x00000100,
1120         0x55e4, 0xffffffff, 0x00600100,
1121         0x3c280, 0xffffffff, 0x00000100,
1122         0x3c214, 0xffffffff, 0x06000100,
1123         0x3c220, 0xffffffff, 0x00000100,
1124         0x3c218, 0xffffffff, 0x06000100,
1125         0x3c204, 0xffffffff, 0x00000100,
1126         0x3c2e0, 0xffffffff, 0x00000100,
1127         0x3c224, 0xffffffff, 0x00000100,
1128         0x3c200, 0xffffffff, 0x00000100,
1129         0x3c230, 0xffffffff, 0x00000100,
1130         0x3c234, 0xffffffff, 0x00000100,
1131         0x3c250, 0xffffffff, 0x00000100,
1132         0x3c254, 0xffffffff, 0x00000100,
1133         0x3c258, 0xffffffff, 0x00000100,
1134         0x3c25c, 0xffffffff, 0x00000100,
1135         0x3c260, 0xffffffff, 0x00000100,
1136         0x3c27c, 0xffffffff, 0x00000100,
1137         0x3c278, 0xffffffff, 0x00000100,
1138         0x3c210, 0xffffffff, 0x06000100,
1139         0x3c290, 0xffffffff, 0x00000100,
1140         0x3c274, 0xffffffff, 0x00000100,
1141         0x3c2b4, 0xffffffff, 0x00000100,
1142         0x3c2b0, 0xffffffff, 0x00000100,
1143         0x3c270, 0xffffffff, 0x00000100,
1144         0x30800, 0xffffffff, 0xe0000000,
1145         0x3c020, 0xffffffff, 0x00010000,
1146         0x3c024, 0xffffffff, 0x00030002,
1147         0x3c028, 0xffffffff, 0x00040007,
1148         0x3c02c, 0xffffffff, 0x00060005,
1149         0x3c030, 0xffffffff, 0x00090008,
1150         0x3c034, 0xffffffff, 0x00010000,
1151         0x3c038, 0xffffffff, 0x00030002,
1152         0x3c03c, 0xffffffff, 0x00040007,
1153         0x3c040, 0xffffffff, 0x00060005,
1154         0x3c044, 0xffffffff, 0x00090008,
1155         0x3c048, 0xffffffff, 0x00010000,
1156         0x3c04c, 0xffffffff, 0x00030002,
1157         0x3c050, 0xffffffff, 0x00040007,
1158         0x3c054, 0xffffffff, 0x00060005,
1159         0x3c058, 0xffffffff, 0x00090008,
1160         0x3c05c, 0xffffffff, 0x00010000,
1161         0x3c060, 0xffffffff, 0x00030002,
1162         0x3c064, 0xffffffff, 0x00040007,
1163         0x3c068, 0xffffffff, 0x00060005,
1164         0x3c06c, 0xffffffff, 0x00090008,
1165         0x3c070, 0xffffffff, 0x00010000,
1166         0x3c074, 0xffffffff, 0x00030002,
1167         0x3c078, 0xffffffff, 0x00040007,
1168         0x3c07c, 0xffffffff, 0x00060005,
1169         0x3c080, 0xffffffff, 0x00090008,
1170         0x3c084, 0xffffffff, 0x00010000,
1171         0x3c088, 0xffffffff, 0x00030002,
1172         0x3c08c, 0xffffffff, 0x00040007,
1173         0x3c090, 0xffffffff, 0x00060005,
1174         0x3c094, 0xffffffff, 0x00090008,
1175         0x3c098, 0xffffffff, 0x00010000,
1176         0x3c09c, 0xffffffff, 0x00030002,
1177         0x3c0a0, 0xffffffff, 0x00040007,
1178         0x3c0a4, 0xffffffff, 0x00060005,
1179         0x3c0a8, 0xffffffff, 0x00090008,
1180         0x3c0ac, 0xffffffff, 0x00010000,
1181         0x3c0b0, 0xffffffff, 0x00030002,
1182         0x3c0b4, 0xffffffff, 0x00040007,
1183         0x3c0b8, 0xffffffff, 0x00060005,
1184         0x3c0bc, 0xffffffff, 0x00090008,
1185         0x3c000, 0xffffffff, 0x96e00200,
1186         0x8708, 0xffffffff, 0x00900100,
1187         0xc424, 0xffffffff, 0x0020003f,
1188         0x38, 0xffffffff, 0x0140001c,
1189         0x3c, 0x000f0000, 0x000f0000,
1190         0x220, 0xffffffff, 0xC060000C,
1191         0x224, 0xc0000fff, 0x00000100,
1192         0xf90, 0xffffffff, 0x00000100,
1193         0xf98, 0x00000101, 0x00000000,
1194         0x20a8, 0xffffffff, 0x00000104,
1195         0x55e4, 0xff000fff, 0x00000100,
1196         0x30cc, 0xc0000fff, 0x00000104,
1197         0xc1e4, 0x00000001, 0x00000001,
1198         0xd00c, 0xff000ff0, 0x00000100,
1199         0xd80c, 0xff000ff0, 0x00000100
1200 };
1201
1202 static const u32 kalindi_golden_spm_registers[] =
1203 {
1204         0x30800, 0xe0ffffff, 0xe0000000
1205 };
1206
1207 static const u32 kalindi_golden_common_registers[] =
1208 {
1209         0xc770, 0xffffffff, 0x00000800,
1210         0xc774, 0xffffffff, 0x00000800,
1211         0xc798, 0xffffffff, 0x00007fbf,
1212         0xc79c, 0xffffffff, 0x00007faf
1213 };
1214
1215 static const u32 kalindi_golden_registers[] =
1216 {
1217         0x3c000, 0xffffdfff, 0x6e944040,
1218         0x55e4, 0xff607fff, 0xfc000100,
1219         0x3c220, 0xff000fff, 0x00000100,
1220         0x3c224, 0xff000fff, 0x00000100,
1221         0x3c200, 0xfffc0fff, 0x00000100,
1222         0x6ed8, 0x00010101, 0x00010000,
1223         0x9830, 0xffffffff, 0x00000000,
1224         0x9834, 0xf00fffff, 0x00000400,
1225         0x5bb0, 0x000000f0, 0x00000070,
1226         0x5bc0, 0xf0311fff, 0x80300000,
1227         0x98f8, 0x73773777, 0x12010001,
1228         0x98fc, 0xffffffff, 0x00000010,
1229         0x9b7c, 0x00ff0000, 0x00fc0000,
1230         0x8030, 0x00001f0f, 0x0000100a,
1231         0x2f48, 0x73773777, 0x12010001,
1232         0x2408, 0x000fffff, 0x000c007f,
1233         0x8a14, 0xf000003f, 0x00000007,
1234         0x8b24, 0x3fff3fff, 0x00ffcfff,
1235         0x30a04, 0x0000ff0f, 0x00000000,
1236         0x28a4c, 0x07ffffff, 0x06000000,
1237         0x4d8, 0x00000fff, 0x00000100,
1238         0x3e78, 0x00000001, 0x00000002,
1239         0xc768, 0x00000008, 0x00000008,
1240         0x8c00, 0x000000ff, 0x00000003,
1241         0x214f8, 0x01ff01ff, 0x00000002,
1242         0x21498, 0x007ff800, 0x00200000,
1243         0x2015c, 0xffffffff, 0x00000f40,
1244         0x88c4, 0x001f3ae3, 0x00000082,
1245         0x88d4, 0x0000001f, 0x00000010,
1246         0x30934, 0xffffffff, 0x00000000
1247 };
1248
1249 static const u32 kalindi_mgcg_cgcg_init[] =
1250 {
1251         0xc420, 0xffffffff, 0xfffffffc,
1252         0x30800, 0xffffffff, 0xe0000000,
1253         0x3c2a0, 0xffffffff, 0x00000100,
1254         0x3c208, 0xffffffff, 0x00000100,
1255         0x3c2c0, 0xffffffff, 0x00000100,
1256         0x3c2c8, 0xffffffff, 0x00000100,
1257         0x3c2c4, 0xffffffff, 0x00000100,
1258         0x55e4, 0xffffffff, 0x00600100,
1259         0x3c280, 0xffffffff, 0x00000100,
1260         0x3c214, 0xffffffff, 0x06000100,
1261         0x3c220, 0xffffffff, 0x00000100,
1262         0x3c218, 0xffffffff, 0x06000100,
1263         0x3c204, 0xffffffff, 0x00000100,
1264         0x3c2e0, 0xffffffff, 0x00000100,
1265         0x3c224, 0xffffffff, 0x00000100,
1266         0x3c200, 0xffffffff, 0x00000100,
1267         0x3c230, 0xffffffff, 0x00000100,
1268         0x3c234, 0xffffffff, 0x00000100,
1269         0x3c250, 0xffffffff, 0x00000100,
1270         0x3c254, 0xffffffff, 0x00000100,
1271         0x3c258, 0xffffffff, 0x00000100,
1272         0x3c25c, 0xffffffff, 0x00000100,
1273         0x3c260, 0xffffffff, 0x00000100,
1274         0x3c27c, 0xffffffff, 0x00000100,
1275         0x3c278, 0xffffffff, 0x00000100,
1276         0x3c210, 0xffffffff, 0x06000100,
1277         0x3c290, 0xffffffff, 0x00000100,
1278         0x3c274, 0xffffffff, 0x00000100,
1279         0x3c2b4, 0xffffffff, 0x00000100,
1280         0x3c2b0, 0xffffffff, 0x00000100,
1281         0x3c270, 0xffffffff, 0x00000100,
1282         0x30800, 0xffffffff, 0xe0000000,
1283         0x3c020, 0xffffffff, 0x00010000,
1284         0x3c024, 0xffffffff, 0x00030002,
1285         0x3c028, 0xffffffff, 0x00040007,
1286         0x3c02c, 0xffffffff, 0x00060005,
1287         0x3c030, 0xffffffff, 0x00090008,
1288         0x3c034, 0xffffffff, 0x00010000,
1289         0x3c038, 0xffffffff, 0x00030002,
1290         0x3c03c, 0xffffffff, 0x00040007,
1291         0x3c040, 0xffffffff, 0x00060005,
1292         0x3c044, 0xffffffff, 0x00090008,
1293         0x3c000, 0xffffffff, 0x96e00200,
1294         0x8708, 0xffffffff, 0x00900100,
1295         0xc424, 0xffffffff, 0x0020003f,
1296         0x38, 0xffffffff, 0x0140001c,
1297         0x3c, 0x000f0000, 0x000f0000,
1298         0x220, 0xffffffff, 0xC060000C,
1299         0x224, 0xc0000fff, 0x00000100,
1300         0x20a8, 0xffffffff, 0x00000104,
1301         0x55e4, 0xff000fff, 0x00000100,
1302         0x30cc, 0xc0000fff, 0x00000104,
1303         0xc1e4, 0x00000001, 0x00000001,
1304         0xd00c, 0xff000ff0, 0x00000100,
1305         0xd80c, 0xff000ff0, 0x00000100
1306 };
1307
1308 static const u32 hawaii_golden_spm_registers[] =
1309 {
1310         0x30800, 0xe0ffffff, 0xe0000000
1311 };
1312
1313 static const u32 hawaii_golden_common_registers[] =
1314 {
1315         0x30800, 0xffffffff, 0xe0000000,
1316         0x28350, 0xffffffff, 0x3a00161a,
1317         0x28354, 0xffffffff, 0x0000002e,
1318         0x9a10, 0xffffffff, 0x00018208,
1319         0x98f8, 0xffffffff, 0x12011003
1320 };
1321
1322 static const u32 hawaii_golden_registers[] =
1323 {
1324         0x3354, 0x00000333, 0x00000333,
1325         0x9a10, 0x00010000, 0x00058208,
1326         0x9830, 0xffffffff, 0x00000000,
1327         0x9834, 0xf00fffff, 0x00000400,
1328         0x9838, 0x0002021c, 0x00020200,
1329         0xc78, 0x00000080, 0x00000000,
1330         0x5bb0, 0x000000f0, 0x00000070,
1331         0x5bc0, 0xf0311fff, 0x80300000,
1332         0x350c, 0x00810000, 0x408af000,
1333         0x7030, 0x31000111, 0x00000011,
1334         0x2f48, 0x73773777, 0x12010001,
1335         0x2120, 0x0000007f, 0x0000001b,
1336         0x21dc, 0x00007fb6, 0x00002191,
1337         0x3628, 0x0000003f, 0x0000000a,
1338         0x362c, 0x0000003f, 0x0000000a,
1339         0x2ae4, 0x00073ffe, 0x000022a2,
1340         0x240c, 0x000007ff, 0x00000000,
1341         0x8bf0, 0x00002001, 0x00000001,
1342         0x8b24, 0xffffffff, 0x00ffffff,
1343         0x30a04, 0x0000ff0f, 0x00000000,
1344         0x28a4c, 0x07ffffff, 0x06000000,
1345         0x3e78, 0x00000001, 0x00000002,
1346         0xc768, 0x00000008, 0x00000008,
1347         0xc770, 0x00000f00, 0x00000800,
1348         0xc774, 0x00000f00, 0x00000800,
1349         0xc798, 0x00ffffff, 0x00ff7fbf,
1350         0xc79c, 0x00ffffff, 0x00ff7faf,
1351         0x8c00, 0x000000ff, 0x00000800,
1352         0xe40, 0x00001fff, 0x00001fff,
1353         0x9060, 0x0000007f, 0x00000020,
1354         0x9508, 0x00010000, 0x00010000,
1355         0xae00, 0x00100000, 0x000ff07c,
1356         0xac14, 0x000003ff, 0x0000000f,
1357         0xac10, 0xffffffff, 0x7564fdec,
1358         0xac0c, 0xffffffff, 0x3120b9a8,
1359         0xac08, 0x20000000, 0x0f9c0000
1360 };
1361
1362 static const u32 hawaii_mgcg_cgcg_init[] =
1363 {
1364         0xc420, 0xffffffff, 0xfffffffd,
1365         0x30800, 0xffffffff, 0xe0000000,
1366         0x3c2a0, 0xffffffff, 0x00000100,
1367         0x3c208, 0xffffffff, 0x00000100,
1368         0x3c2c0, 0xffffffff, 0x00000100,
1369         0x3c2c8, 0xffffffff, 0x00000100,
1370         0x3c2c4, 0xffffffff, 0x00000100,
1371         0x55e4, 0xffffffff, 0x00200100,
1372         0x3c280, 0xffffffff, 0x00000100,
1373         0x3c214, 0xffffffff, 0x06000100,
1374         0x3c220, 0xffffffff, 0x00000100,
1375         0x3c218, 0xffffffff, 0x06000100,
1376         0x3c204, 0xffffffff, 0x00000100,
1377         0x3c2e0, 0xffffffff, 0x00000100,
1378         0x3c224, 0xffffffff, 0x00000100,
1379         0x3c200, 0xffffffff, 0x00000100,
1380         0x3c230, 0xffffffff, 0x00000100,
1381         0x3c234, 0xffffffff, 0x00000100,
1382         0x3c250, 0xffffffff, 0x00000100,
1383         0x3c254, 0xffffffff, 0x00000100,
1384         0x3c258, 0xffffffff, 0x00000100,
1385         0x3c25c, 0xffffffff, 0x00000100,
1386         0x3c260, 0xffffffff, 0x00000100,
1387         0x3c27c, 0xffffffff, 0x00000100,
1388         0x3c278, 0xffffffff, 0x00000100,
1389         0x3c210, 0xffffffff, 0x06000100,
1390         0x3c290, 0xffffffff, 0x00000100,
1391         0x3c274, 0xffffffff, 0x00000100,
1392         0x3c2b4, 0xffffffff, 0x00000100,
1393         0x3c2b0, 0xffffffff, 0x00000100,
1394         0x3c270, 0xffffffff, 0x00000100,
1395         0x30800, 0xffffffff, 0xe0000000,
1396         0x3c020, 0xffffffff, 0x00010000,
1397         0x3c024, 0xffffffff, 0x00030002,
1398         0x3c028, 0xffffffff, 0x00040007,
1399         0x3c02c, 0xffffffff, 0x00060005,
1400         0x3c030, 0xffffffff, 0x00090008,
1401         0x3c034, 0xffffffff, 0x00010000,
1402         0x3c038, 0xffffffff, 0x00030002,
1403         0x3c03c, 0xffffffff, 0x00040007,
1404         0x3c040, 0xffffffff, 0x00060005,
1405         0x3c044, 0xffffffff, 0x00090008,
1406         0x3c048, 0xffffffff, 0x00010000,
1407         0x3c04c, 0xffffffff, 0x00030002,
1408         0x3c050, 0xffffffff, 0x00040007,
1409         0x3c054, 0xffffffff, 0x00060005,
1410         0x3c058, 0xffffffff, 0x00090008,
1411         0x3c05c, 0xffffffff, 0x00010000,
1412         0x3c060, 0xffffffff, 0x00030002,
1413         0x3c064, 0xffffffff, 0x00040007,
1414         0x3c068, 0xffffffff, 0x00060005,
1415         0x3c06c, 0xffffffff, 0x00090008,
1416         0x3c070, 0xffffffff, 0x00010000,
1417         0x3c074, 0xffffffff, 0x00030002,
1418         0x3c078, 0xffffffff, 0x00040007,
1419         0x3c07c, 0xffffffff, 0x00060005,
1420         0x3c080, 0xffffffff, 0x00090008,
1421         0x3c084, 0xffffffff, 0x00010000,
1422         0x3c088, 0xffffffff, 0x00030002,
1423         0x3c08c, 0xffffffff, 0x00040007,
1424         0x3c090, 0xffffffff, 0x00060005,
1425         0x3c094, 0xffffffff, 0x00090008,
1426         0x3c098, 0xffffffff, 0x00010000,
1427         0x3c09c, 0xffffffff, 0x00030002,
1428         0x3c0a0, 0xffffffff, 0x00040007,
1429         0x3c0a4, 0xffffffff, 0x00060005,
1430         0x3c0a8, 0xffffffff, 0x00090008,
1431         0x3c0ac, 0xffffffff, 0x00010000,
1432         0x3c0b0, 0xffffffff, 0x00030002,
1433         0x3c0b4, 0xffffffff, 0x00040007,
1434         0x3c0b8, 0xffffffff, 0x00060005,
1435         0x3c0bc, 0xffffffff, 0x00090008,
1436         0x3c0c0, 0xffffffff, 0x00010000,
1437         0x3c0c4, 0xffffffff, 0x00030002,
1438         0x3c0c8, 0xffffffff, 0x00040007,
1439         0x3c0cc, 0xffffffff, 0x00060005,
1440         0x3c0d0, 0xffffffff, 0x00090008,
1441         0x3c0d4, 0xffffffff, 0x00010000,
1442         0x3c0d8, 0xffffffff, 0x00030002,
1443         0x3c0dc, 0xffffffff, 0x00040007,
1444         0x3c0e0, 0xffffffff, 0x00060005,
1445         0x3c0e4, 0xffffffff, 0x00090008,
1446         0x3c0e8, 0xffffffff, 0x00010000,
1447         0x3c0ec, 0xffffffff, 0x00030002,
1448         0x3c0f0, 0xffffffff, 0x00040007,
1449         0x3c0f4, 0xffffffff, 0x00060005,
1450         0x3c0f8, 0xffffffff, 0x00090008,
1451         0xc318, 0xffffffff, 0x00020200,
1452         0x3350, 0xffffffff, 0x00000200,
1453         0x15c0, 0xffffffff, 0x00000400,
1454         0x55e8, 0xffffffff, 0x00000000,
1455         0x2f50, 0xffffffff, 0x00000902,
1456         0x3c000, 0xffffffff, 0x96940200,
1457         0x8708, 0xffffffff, 0x00900100,
1458         0xc424, 0xffffffff, 0x0020003f,
1459         0x38, 0xffffffff, 0x0140001c,
1460         0x3c, 0x000f0000, 0x000f0000,
1461         0x220, 0xffffffff, 0xc060000c,
1462         0x224, 0xc0000fff, 0x00000100,
1463         0xf90, 0xffffffff, 0x00000100,
1464         0xf98, 0x00000101, 0x00000000,
1465         0x20a8, 0xffffffff, 0x00000104,
1466         0x55e4, 0xff000fff, 0x00000100,
1467         0x30cc, 0xc0000fff, 0x00000104,
1468         0xc1e4, 0x00000001, 0x00000001,
1469         0xd00c, 0xff000ff0, 0x00000100,
1470         0xd80c, 0xff000ff0, 0x00000100
1471 };
1472
1473 static void cik_init_golden_registers(struct radeon_device *rdev)
1474 {
1475         switch (rdev->family) {
1476         case CHIP_BONAIRE:
1477                 radeon_program_register_sequence(rdev,
1478                                                  bonaire_mgcg_cgcg_init,
1479                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1480                 radeon_program_register_sequence(rdev,
1481                                                  bonaire_golden_registers,
1482                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1483                 radeon_program_register_sequence(rdev,
1484                                                  bonaire_golden_common_registers,
1485                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1486                 radeon_program_register_sequence(rdev,
1487                                                  bonaire_golden_spm_registers,
1488                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1489                 break;
1490         case CHIP_KABINI:
1491                 radeon_program_register_sequence(rdev,
1492                                                  kalindi_mgcg_cgcg_init,
1493                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1494                 radeon_program_register_sequence(rdev,
1495                                                  kalindi_golden_registers,
1496                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1497                 radeon_program_register_sequence(rdev,
1498                                                  kalindi_golden_common_registers,
1499                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1500                 radeon_program_register_sequence(rdev,
1501                                                  kalindi_golden_spm_registers,
1502                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1503                 break;
1504         case CHIP_KAVERI:
1505                 radeon_program_register_sequence(rdev,
1506                                                  spectre_mgcg_cgcg_init,
1507                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1508                 radeon_program_register_sequence(rdev,
1509                                                  spectre_golden_registers,
1510                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1511                 radeon_program_register_sequence(rdev,
1512                                                  spectre_golden_common_registers,
1513                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1514                 radeon_program_register_sequence(rdev,
1515                                                  spectre_golden_spm_registers,
1516                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1517                 break;
1518         case CHIP_HAWAII:
1519                 radeon_program_register_sequence(rdev,
1520                                                  hawaii_mgcg_cgcg_init,
1521                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1522                 radeon_program_register_sequence(rdev,
1523                                                  hawaii_golden_registers,
1524                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1525                 radeon_program_register_sequence(rdev,
1526                                                  hawaii_golden_common_registers,
1527                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1528                 radeon_program_register_sequence(rdev,
1529                                                  hawaii_golden_spm_registers,
1530                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1531                 break;
1532         default:
1533                 break;
1534         }
1535 }
1536
1537 /**
1538  * cik_get_xclk - get the xclk
1539  *
1540  * @rdev: radeon_device pointer
1541  *
1542  * Returns the reference clock used by the gfx engine
1543  * (CIK).
1544  */
1545 u32 cik_get_xclk(struct radeon_device *rdev)
1546 {
1547         u32 reference_clock = rdev->clock.spll.reference_freq;
1548
1549         if (rdev->flags & RADEON_IS_IGP) {
1550                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1551                         return reference_clock / 2;
1552         } else {
1553                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1554                         return reference_clock / 4;
1555         }
1556         return reference_clock;
1557 }
1558
1559 /**
1560  * cik_mm_rdoorbell - read a doorbell dword
1561  *
1562  * @rdev: radeon_device pointer
1563  * @offset: byte offset into the aperture
1564  *
1565  * Returns the value in the doorbell aperture at the
1566  * requested offset (CIK).
1567  */
1568 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1569 {
1570         if (offset < rdev->doorbell.size) {
1571                 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1572         } else {
1573                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1574                 return 0;
1575         }
1576 }
1577
1578 /**
1579  * cik_mm_wdoorbell - write a doorbell dword
1580  *
1581  * @rdev: radeon_device pointer
1582  * @offset: byte offset into the aperture
1583  * @v: value to write
1584  *
1585  * Writes @v to the doorbell aperture at the
1586  * requested offset (CIK).
1587  */
1588 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1589 {
1590         if (offset < rdev->doorbell.size) {
1591                 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1592         } else {
1593                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1594         }
1595 }
1596
1597 #define BONAIRE_IO_MC_REGS_SIZE 36
1598
1599 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1600 {
1601         {0x00000070, 0x04400000},
1602         {0x00000071, 0x80c01803},
1603         {0x00000072, 0x00004004},
1604         {0x00000073, 0x00000100},
1605         {0x00000074, 0x00ff0000},
1606         {0x00000075, 0x34000000},
1607         {0x00000076, 0x08000014},
1608         {0x00000077, 0x00cc08ec},
1609         {0x00000078, 0x00000400},
1610         {0x00000079, 0x00000000},
1611         {0x0000007a, 0x04090000},
1612         {0x0000007c, 0x00000000},
1613         {0x0000007e, 0x4408a8e8},
1614         {0x0000007f, 0x00000304},
1615         {0x00000080, 0x00000000},
1616         {0x00000082, 0x00000001},
1617         {0x00000083, 0x00000002},
1618         {0x00000084, 0xf3e4f400},
1619         {0x00000085, 0x052024e3},
1620         {0x00000087, 0x00000000},
1621         {0x00000088, 0x01000000},
1622         {0x0000008a, 0x1c0a0000},
1623         {0x0000008b, 0xff010000},
1624         {0x0000008d, 0xffffefff},
1625         {0x0000008e, 0xfff3efff},
1626         {0x0000008f, 0xfff3efbf},
1627         {0x00000092, 0xf7ffffff},
1628         {0x00000093, 0xffffff7f},
1629         {0x00000095, 0x00101101},
1630         {0x00000096, 0x00000fff},
1631         {0x00000097, 0x00116fff},
1632         {0x00000098, 0x60010000},
1633         {0x00000099, 0x10010000},
1634         {0x0000009a, 0x00006000},
1635         {0x0000009b, 0x00001000},
1636         {0x0000009f, 0x00b48000}
1637 };
1638
1639 #define HAWAII_IO_MC_REGS_SIZE 22
1640
1641 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1642 {
1643         {0x0000007d, 0x40000000},
1644         {0x0000007e, 0x40180304},
1645         {0x0000007f, 0x0000ff00},
1646         {0x00000081, 0x00000000},
1647         {0x00000083, 0x00000800},
1648         {0x00000086, 0x00000000},
1649         {0x00000087, 0x00000100},
1650         {0x00000088, 0x00020100},
1651         {0x00000089, 0x00000000},
1652         {0x0000008b, 0x00040000},
1653         {0x0000008c, 0x00000100},
1654         {0x0000008e, 0xff010000},
1655         {0x00000090, 0xffffefff},
1656         {0x00000091, 0xfff3efff},
1657         {0x00000092, 0xfff3efbf},
1658         {0x00000093, 0xf7ffffff},
1659         {0x00000094, 0xffffff7f},
1660         {0x00000095, 0x00000fff},
1661         {0x00000096, 0x00116fff},
1662         {0x00000097, 0x60010000},
1663         {0x00000098, 0x10010000},
1664         {0x0000009f, 0x00c79000}
1665 };
1666
1667
1668 /**
1669  * cik_srbm_select - select specific register instances
1670  *
1671  * @rdev: radeon_device pointer
1672  * @me: selected ME (micro engine)
1673  * @pipe: pipe
1674  * @queue: queue
1675  * @vmid: VMID
1676  *
1677  * Switches the currently active registers instances.  Some
1678  * registers are instanced per VMID, others are instanced per
1679  * me/pipe/queue combination.
1680  */
1681 static void cik_srbm_select(struct radeon_device *rdev,
1682                             u32 me, u32 pipe, u32 queue, u32 vmid)
1683 {
1684         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1685                              MEID(me & 0x3) |
1686                              VMID(vmid & 0xf) |
1687                              QUEUEID(queue & 0x7));
1688         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1689 }
1690
1691 /* ucode loading */
1692 /**
1693  * ci_mc_load_microcode - load MC ucode into the hw
1694  *
1695  * @rdev: radeon_device pointer
1696  *
1697  * Load the GDDR MC ucode into the hw (CIK).
1698  * Returns 0 on success, error on failure.
1699  */
1700 static int ci_mc_load_microcode(struct radeon_device *rdev)
1701 {
1702         const __be32 *fw_data;
1703         u32 running, blackout = 0;
1704         u32 *io_mc_regs;
1705         int i, ucode_size, regs_size;
1706
1707         if (!rdev->mc_fw)
1708                 return -EINVAL;
1709
1710         switch (rdev->family) {
1711         case CHIP_BONAIRE:
1712                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1713                 ucode_size = CIK_MC_UCODE_SIZE;
1714                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1715                 break;
1716         case CHIP_HAWAII:
1717                 io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1718                 ucode_size = HAWAII_MC_UCODE_SIZE;
1719                 regs_size = HAWAII_IO_MC_REGS_SIZE;
1720                 break;
1721         default:
1722                 return -EINVAL;
1723         }
1724
1725         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1726
1727         if (running == 0) {
1728                 if (running) {
1729                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1730                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1731                 }
1732
1733                 /* reset the engine and set to writable */
1734                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1735                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1736
1737                 /* load mc io regs */
1738                 for (i = 0; i < regs_size; i++) {
1739                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1740                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1741                 }
1742                 /* load the MC ucode */
1743                 fw_data = (const __be32 *)rdev->mc_fw->data;
1744                 for (i = 0; i < ucode_size; i++)
1745                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1746
1747                 /* put the engine back into the active state */
1748                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1749                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1750                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1751
1752                 /* wait for training to complete */
1753                 for (i = 0; i < rdev->usec_timeout; i++) {
1754                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1755                                 break;
1756                         udelay(1);
1757                 }
1758                 for (i = 0; i < rdev->usec_timeout; i++) {
1759                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1760                                 break;
1761                         udelay(1);
1762                 }
1763
1764                 if (running)
1765                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1766         }
1767
1768         return 0;
1769 }
1770
1771 /**
1772  * cik_init_microcode - load ucode images from disk
1773  *
1774  * @rdev: radeon_device pointer
1775  *
1776  * Use the firmware interface to load the ucode images into
1777  * the driver (not loaded into hw).
1778  * Returns 0 on success, error on failure.
1779  */
1780 static int cik_init_microcode(struct radeon_device *rdev)
1781 {
1782         const char *chip_name;
1783         size_t pfp_req_size, me_req_size, ce_req_size,
1784                 mec_req_size, rlc_req_size, mc_req_size = 0,
1785                 sdma_req_size, smc_req_size = 0;
1786         char fw_name[30];
1787         int err;
1788
1789         DRM_DEBUG("\n");
1790
1791         switch (rdev->family) {
1792         case CHIP_BONAIRE:
1793                 chip_name = "BONAIRE";
1794                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1795                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1796                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1797                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1798                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1799                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1800                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1801                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1802                 break;
1803         case CHIP_HAWAII:
1804                 chip_name = "HAWAII";
1805                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1806                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1807                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1808                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1809                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1810                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1811                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1812                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1813                 break;
1814         case CHIP_KAVERI:
1815                 chip_name = "KAVERI";
1816                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1817                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1818                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1819                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1820                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1821                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1822                 break;
1823         case CHIP_KABINI:
1824                 chip_name = "KABINI";
1825                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1826                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1827                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1828                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1829                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1830                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1831                 break;
1832         default: BUG();
1833         }
1834
1835         DRM_INFO("Loading %s Microcode\n", chip_name);
1836
1837         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1838         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1839         if (err)
1840                 goto out;
1841         if (rdev->pfp_fw->size != pfp_req_size) {
1842                 printk(KERN_ERR
1843                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1844                        rdev->pfp_fw->size, fw_name);
1845                 err = -EINVAL;
1846                 goto out;
1847         }
1848
1849         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1850         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1851         if (err)
1852                 goto out;
1853         if (rdev->me_fw->size != me_req_size) {
1854                 printk(KERN_ERR
1855                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1856                        rdev->me_fw->size, fw_name);
1857                 err = -EINVAL;
1858         }
1859
1860         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1861         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1862         if (err)
1863                 goto out;
1864         if (rdev->ce_fw->size != ce_req_size) {
1865                 printk(KERN_ERR
1866                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1867                        rdev->ce_fw->size, fw_name);
1868                 err = -EINVAL;
1869         }
1870
1871         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1872         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1873         if (err)
1874                 goto out;
1875         if (rdev->mec_fw->size != mec_req_size) {
1876                 printk(KERN_ERR
1877                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1878                        rdev->mec_fw->size, fw_name);
1879                 err = -EINVAL;
1880         }
1881
1882         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1883         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1884         if (err)
1885                 goto out;
1886         if (rdev->rlc_fw->size != rlc_req_size) {
1887                 printk(KERN_ERR
1888                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1889                        rdev->rlc_fw->size, fw_name);
1890                 err = -EINVAL;
1891         }
1892
1893         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1894         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1895         if (err)
1896                 goto out;
1897         if (rdev->sdma_fw->size != sdma_req_size) {
1898                 printk(KERN_ERR
1899                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1900                        rdev->sdma_fw->size, fw_name);
1901                 err = -EINVAL;
1902         }
1903
1904         /* No SMC, MC ucode on APUs */
1905         if (!(rdev->flags & RADEON_IS_IGP)) {
1906                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1907                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1908                 if (err)
1909                         goto out;
1910                 if (rdev->mc_fw->size != mc_req_size) {
1911                         printk(KERN_ERR
1912                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1913                                rdev->mc_fw->size, fw_name);
1914                         err = -EINVAL;
1915                 }
1916
1917                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1918                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1919                 if (err) {
1920                         printk(KERN_ERR
1921                                "smc: error loading firmware \"%s\"\n",
1922                                fw_name);
1923                         release_firmware(rdev->smc_fw);
1924                         rdev->smc_fw = NULL;
1925                         err = 0;
1926                 } else if (rdev->smc_fw->size != smc_req_size) {
1927                         printk(KERN_ERR
1928                                "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1929                                rdev->smc_fw->size, fw_name);
1930                         err = -EINVAL;
1931                 }
1932         }
1933
1934 out:
1935         if (err) {
1936                 if (err != -EINVAL)
1937                         printk(KERN_ERR
1938                                "cik_cp: Failed to load firmware \"%s\"\n",
1939                                fw_name);
1940                 release_firmware(rdev->pfp_fw);
1941                 rdev->pfp_fw = NULL;
1942                 release_firmware(rdev->me_fw);
1943                 rdev->me_fw = NULL;
1944                 release_firmware(rdev->ce_fw);
1945                 rdev->ce_fw = NULL;
1946                 release_firmware(rdev->rlc_fw);
1947                 rdev->rlc_fw = NULL;
1948                 release_firmware(rdev->mc_fw);
1949                 rdev->mc_fw = NULL;
1950                 release_firmware(rdev->smc_fw);
1951                 rdev->smc_fw = NULL;
1952         }
1953         return err;
1954 }
1955
1956 /*
1957  * Core functions
1958  */
1959 /**
1960  * cik_tiling_mode_table_init - init the hw tiling table
1961  *
1962  * @rdev: radeon_device pointer
1963  *
1964  * Starting with SI, the tiling setup is done globally in a
1965  * set of 32 tiling modes.  Rather than selecting each set of
1966  * parameters per surface as on older asics, we just select
1967  * which index in the tiling table we want to use, and the
1968  * surface uses those parameters (CIK).
1969  */
1970 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1971 {
1972         const u32 num_tile_mode_states = 32;
1973         const u32 num_secondary_tile_mode_states = 16;
1974         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1975         u32 num_pipe_configs;
1976         u32 num_rbs = rdev->config.cik.max_backends_per_se *
1977                 rdev->config.cik.max_shader_engines;
1978
1979         switch (rdev->config.cik.mem_row_size_in_kb) {
1980         case 1:
1981                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1982                 break;
1983         case 2:
1984         default:
1985                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1986                 break;
1987         case 4:
1988                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1989                 break;
1990         }
1991
1992         num_pipe_configs = rdev->config.cik.max_tile_pipes;
1993         if (num_pipe_configs > 8)
1994                 num_pipe_configs = 16;
1995
1996         if (num_pipe_configs == 16) {
1997                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1998                         switch (reg_offset) {
1999                         case 0:
2000                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2001                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2002                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2003                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2004                                 break;
2005                         case 1:
2006                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2007                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2008                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2009                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2010                                 break;
2011                         case 2:
2012                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2013                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2014                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2015                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2016                                 break;
2017                         case 3:
2018                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2019                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2020                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2021                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2022                                 break;
2023                         case 4:
2024                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2025                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2026                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2027                                                  TILE_SPLIT(split_equal_to_row_size));
2028                                 break;
2029                         case 5:
2030                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2031                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2032                                 break;
2033                         case 6:
2034                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2035                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2036                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2037                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2038                                 break;
2039                         case 7:
2040                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2041                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2042                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2043                                                  TILE_SPLIT(split_equal_to_row_size));
2044                                 break;
2045                         case 8:
2046                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2047                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2048                                 break;
2049                         case 9:
2050                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2051                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2052                                 break;
2053                         case 10:
2054                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2055                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2056                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2057                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2058                                 break;
2059                         case 11:
2060                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2061                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2062                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2063                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2064                                 break;
2065                         case 12:
2066                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2067                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2068                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2069                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2070                                 break;
2071                         case 13:
2072                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2073                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2074                                 break;
2075                         case 14:
2076                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2077                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2078                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2079                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2080                                 break;
2081                         case 16:
2082                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2083                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2084                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2085                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2086                                 break;
2087                         case 17:
2088                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2089                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2090                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2091                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2092                                 break;
2093                         case 27:
2094                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2095                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2096                                 break;
2097                         case 28:
2098                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2099                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2100                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2101                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2102                                 break;
2103                         case 29:
2104                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2105                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2106                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2107                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2108                                 break;
2109                         case 30:
2110                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2111                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2112                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2113                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2114                                 break;
2115                         default:
2116                                 gb_tile_moden = 0;
2117                                 break;
2118                         }
2119                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2120                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2121                 }
2122                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2123                         switch (reg_offset) {
2124                         case 0:
2125                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2126                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2127                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2128                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2129                                 break;
2130                         case 1:
2131                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2132                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2133                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2134                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2135                                 break;
2136                         case 2:
2137                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2138                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2139                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2140                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2141                                 break;
2142                         case 3:
2143                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2144                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2145                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2146                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2147                                 break;
2148                         case 4:
2149                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2150                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2151                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2152                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2153                                 break;
2154                         case 5:
2155                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2156                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2157                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2158                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2159                                 break;
2160                         case 6:
2161                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2162                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2163                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2164                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2165                                 break;
2166                         case 8:
2167                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2168                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2169                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2170                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2171                                 break;
2172                         case 9:
2173                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2174                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2175                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2176                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2177                                 break;
2178                         case 10:
2179                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2180                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2181                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2182                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2183                                 break;
2184                         case 11:
2185                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2187                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2188                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2189                                 break;
2190                         case 12:
2191                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2192                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2193                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2194                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2195                                 break;
2196                         case 13:
2197                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2199                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2200                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2201                                 break;
2202                         case 14:
2203                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2204                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2205                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2206                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2207                                 break;
2208                         default:
2209                                 gb_tile_moden = 0;
2210                                 break;
2211                         }
2212                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2213                 }
2214         } else if (num_pipe_configs == 8) {
2215                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2216                         switch (reg_offset) {
2217                         case 0:
2218                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2219                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2220                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2221                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2222                                 break;
2223                         case 1:
2224                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2225                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2226                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2227                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2228                                 break;
2229                         case 2:
2230                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2231                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2232                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2233                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2234                                 break;
2235                         case 3:
2236                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2238                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2239                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2240                                 break;
2241                         case 4:
2242                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2244                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2245                                                  TILE_SPLIT(split_equal_to_row_size));
2246                                 break;
2247                         case 5:
2248                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2249                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2250                                 break;
2251                         case 6:
2252                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2253                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2254                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2255                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2256                                 break;
2257                         case 7:
2258                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2259                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2260                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2261                                                  TILE_SPLIT(split_equal_to_row_size));
2262                                 break;
2263                         case 8:
2264                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2265                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2266                                 break;
2267                         case 9:
2268                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2269                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2270                                 break;
2271                         case 10:
2272                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2273                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2274                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2275                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2276                                 break;
2277                         case 11:
2278                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2279                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2280                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2281                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2282                                 break;
2283                         case 12:
2284                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2285                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2286                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2287                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2288                                 break;
2289                         case 13:
2290                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2291                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2292                                 break;
2293                         case 14:
2294                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2296                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2297                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2298                                 break;
2299                         case 16:
2300                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2301                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2302                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2303                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2304                                 break;
2305                         case 17:
2306                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2307                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2308                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2309                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2310                                 break;
2311                         case 27:
2312                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2313                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2314                                 break;
2315                         case 28:
2316                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2318                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2319                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2320                                 break;
2321                         case 29:
2322                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2323                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2324                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2325                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2326                                 break;
2327                         case 30:
2328                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2329                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2330                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2331                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2332                                 break;
2333                         default:
2334                                 gb_tile_moden = 0;
2335                                 break;
2336                         }
2337                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2338                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2339                 }
2340                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2341                         switch (reg_offset) {
2342                         case 0:
2343                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2345                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2346                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2347                                 break;
2348                         case 1:
2349                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2350                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2351                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2352                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2353                                 break;
2354                         case 2:
2355                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2357                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2359                                 break;
2360                         case 3:
2361                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2363                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2364                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2365                                 break;
2366                         case 4:
2367                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2369                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2370                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2371                                 break;
2372                         case 5:
2373                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2374                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2375                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2376                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2377                                 break;
2378                         case 6:
2379                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2382                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2383                                 break;
2384                         case 8:
2385                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2386                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2387                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2388                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2389                                 break;
2390                         case 9:
2391                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2393                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2394                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2395                                 break;
2396                         case 10:
2397                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2399                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2400                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2401                                 break;
2402                         case 11:
2403                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2405                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2406                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2407                                 break;
2408                         case 12:
2409                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2410                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2411                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2412                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2413                                 break;
2414                         case 13:
2415                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2416                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2417                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2418                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2419                                 break;
2420                         case 14:
2421                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2423                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2424                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2425                                 break;
2426                         default:
2427                                 gb_tile_moden = 0;
2428                                 break;
2429                         }
2430                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2431                 }
2432         } else if (num_pipe_configs == 4) {
2433                 if (num_rbs == 4) {
2434                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2435                                 switch (reg_offset) {
2436                                 case 0:
2437                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2438                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2439                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2440                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2441                                         break;
2442                                 case 1:
2443                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2444                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2445                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2446                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2447                                         break;
2448                                 case 2:
2449                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2450                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2451                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2452                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2453                                         break;
2454                                 case 3:
2455                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2456                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2457                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2458                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2459                                         break;
2460                                 case 4:
2461                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2462                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2463                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2464                                                          TILE_SPLIT(split_equal_to_row_size));
2465                                         break;
2466                                 case 5:
2467                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2468                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2469                                         break;
2470                                 case 6:
2471                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2472                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2473                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2474                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2475                                         break;
2476                                 case 7:
2477                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2478                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2479                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2480                                                          TILE_SPLIT(split_equal_to_row_size));
2481                                         break;
2482                                 case 8:
2483                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2484                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2485                                         break;
2486                                 case 9:
2487                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2488                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2489                                         break;
2490                                 case 10:
2491                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2492                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2493                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2494                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2495                                         break;
2496                                 case 11:
2497                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2498                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2499                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2500                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2501                                         break;
2502                                 case 12:
2503                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2504                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2505                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2506                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2507                                         break;
2508                                 case 13:
2509                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2510                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2511                                         break;
2512                                 case 14:
2513                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2515                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2516                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2517                                         break;
2518                                 case 16:
2519                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2520                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2521                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2522                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2523                                         break;
2524                                 case 17:
2525                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2526                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2527                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2528                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2529                                         break;
2530                                 case 27:
2531                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2532                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2533                                         break;
2534                                 case 28:
2535                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2536                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2537                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2538                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2539                                         break;
2540                                 case 29:
2541                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2542                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2543                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2544                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2545                                         break;
2546                                 case 30:
2547                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2548                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2549                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2550                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2551                                         break;
2552                                 default:
2553                                         gb_tile_moden = 0;
2554                                         break;
2555                                 }
2556                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2557                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2558                         }
2559                 } else if (num_rbs < 4) {
2560                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2561                                 switch (reg_offset) {
2562                                 case 0:
2563                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2564                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2565                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2566                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2567                                         break;
2568                                 case 1:
2569                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2570                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2571                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2572                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2573                                         break;
2574                                 case 2:
2575                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2577                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2578                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2579                                         break;
2580                                 case 3:
2581                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2582                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2583                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2584                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2585                                         break;
2586                                 case 4:
2587                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2588                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2589                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2590                                                          TILE_SPLIT(split_equal_to_row_size));
2591                                         break;
2592                                 case 5:
2593                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2594                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2595                                         break;
2596                                 case 6:
2597                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2598                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2599                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2600                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2601                                         break;
2602                                 case 7:
2603                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2604                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2605                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2606                                                          TILE_SPLIT(split_equal_to_row_size));
2607                                         break;
2608                                 case 8:
2609                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2610                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2611                                         break;
2612                                 case 9:
2613                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2614                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2615                                         break;
2616                                 case 10:
2617                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2618                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2619                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2620                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2621                                         break;
2622                                 case 11:
2623                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2624                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2625                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2626                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2627                                         break;
2628                                 case 12:
2629                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2630                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2631                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2632                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2633                                         break;
2634                                 case 13:
2635                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2636                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2637                                         break;
2638                                 case 14:
2639                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2640                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2641                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2642                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2643                                         break;
2644                                 case 16:
2645                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2646                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2647                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2648                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2649                                         break;
2650                                 case 17:
2651                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2652                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2653                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2654                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2655                                         break;
2656                                 case 27:
2657                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2658                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2659                                         break;
2660                                 case 28:
2661                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2662                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2663                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2664                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2665                                         break;
2666                                 case 29:
2667                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2668                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2669                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2670                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2671                                         break;
2672                                 case 30:
2673                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2674                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2675                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2676                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2677                                         break;
2678                                 default:
2679                                         gb_tile_moden = 0;
2680                                         break;
2681                                 }
2682                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2683                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2684                         }
2685                 }
2686                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2687                         switch (reg_offset) {
2688                         case 0:
2689                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2690                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2691                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2692                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2693                                 break;
2694                         case 1:
2695                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2696                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2697                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2698                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2699                                 break;
2700                         case 2:
2701                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2702                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2703                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2704                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2705                                 break;
2706                         case 3:
2707                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2708                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2709                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2710                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2711                                 break;
2712                         case 4:
2713                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2714                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2715                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2716                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2717                                 break;
2718                         case 5:
2719                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2720                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2721                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2722                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2723                                 break;
2724                         case 6:
2725                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2726                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2727                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2728                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2729                                 break;
2730                         case 8:
2731                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2732                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2733                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2734                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2735                                 break;
2736                         case 9:
2737                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2738                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2739                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2740                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2741                                 break;
2742                         case 10:
2743                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2745                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2746                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2747                                 break;
2748                         case 11:
2749                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2750                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2751                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2752                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2753                                 break;
2754                         case 12:
2755                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2757                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2758                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2759                                 break;
2760                         case 13:
2761                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2762                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2763                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2764                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2765                                 break;
2766                         case 14:
2767                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2769                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2770                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2771                                 break;
2772                         default:
2773                                 gb_tile_moden = 0;
2774                                 break;
2775                         }
2776                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2777                 }
2778         } else if (num_pipe_configs == 2) {
2779                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2780                         switch (reg_offset) {
2781                         case 0:
2782                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2783                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2784                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2785                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2786                                 break;
2787                         case 1:
2788                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2790                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2791                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2792                                 break;
2793                         case 2:
2794                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2795                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2796                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2797                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2798                                 break;
2799                         case 3:
2800                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2801                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2802                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2803                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2804                                 break;
2805                         case 4:
2806                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2807                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2808                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2809                                                  TILE_SPLIT(split_equal_to_row_size));
2810                                 break;
2811                         case 5:
2812                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2813                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2814                                 break;
2815                         case 6:
2816                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2817                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2818                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2819                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2820                                 break;
2821                         case 7:
2822                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2823                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2824                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2825                                                  TILE_SPLIT(split_equal_to_row_size));
2826                                 break;
2827                         case 8:
2828                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2829                                 break;
2830                         case 9:
2831                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2832                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2833                                 break;
2834                         case 10:
2835                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2836                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2837                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2838                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2839                                 break;
2840                         case 11:
2841                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2842                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2843                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2844                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2845                                 break;
2846                         case 12:
2847                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2848                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2849                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2850                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2851                                 break;
2852                         case 13:
2853                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2854                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2855                                 break;
2856                         case 14:
2857                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2858                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2859                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2860                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2861                                 break;
2862                         case 16:
2863                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2864                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2865                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2866                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2867                                 break;
2868                         case 17:
2869                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2870                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2871                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2872                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2873                                 break;
2874                         case 27:
2875                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2876                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2877                                 break;
2878                         case 28:
2879                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2880                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2881                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2882                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2883                                 break;
2884                         case 29:
2885                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2886                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2887                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2888                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2889                                 break;
2890                         case 30:
2891                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2892                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2893                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2894                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2895                                 break;
2896                         default:
2897                                 gb_tile_moden = 0;
2898                                 break;
2899                         }
2900                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2901                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2902                 }
2903                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2904                         switch (reg_offset) {
2905                         case 0:
2906                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2907                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2908                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2909                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2910                                 break;
2911                         case 1:
2912                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2913                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2914                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2915                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2916                                 break;
2917                         case 2:
2918                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2919                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2920                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2921                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2922                                 break;
2923                         case 3:
2924                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2925                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2926                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2927                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2928                                 break;
2929                         case 4:
2930                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2931                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2932                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2933                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2934                                 break;
2935                         case 5:
2936                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2937                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2938                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2939                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2940                                 break;
2941                         case 6:
2942                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2943                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2944                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2945                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2946                                 break;
2947                         case 8:
2948                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2949                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2950                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2951                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2952                                 break;
2953                         case 9:
2954                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2955                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2956                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2957                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2958                                 break;
2959                         case 10:
2960                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2961                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2962                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2964                                 break;
2965                         case 11:
2966                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2967                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2968                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2969                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2970                                 break;
2971                         case 12:
2972                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2974                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2976                                 break;
2977                         case 13:
2978                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2979                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2980                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2981                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2982                                 break;
2983                         case 14:
2984                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2985                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2986                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2987                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2988                                 break;
2989                         default:
2990                                 gb_tile_moden = 0;
2991                                 break;
2992                         }
2993                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2994                 }
2995         } else
2996                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2997 }
2998
2999 /**
3000  * cik_select_se_sh - select which SE, SH to address
3001  *
3002  * @rdev: radeon_device pointer
3003  * @se_num: shader engine to address
3004  * @sh_num: sh block to address
3005  *
3006  * Select which SE, SH combinations to address. Certain
3007  * registers are instanced per SE or SH.  0xffffffff means
3008  * broadcast to all SEs or SHs (CIK).
3009  */
3010 static void cik_select_se_sh(struct radeon_device *rdev,
3011                              u32 se_num, u32 sh_num)
3012 {
3013         u32 data = INSTANCE_BROADCAST_WRITES;
3014
3015         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3016                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3017         else if (se_num == 0xffffffff)
3018                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3019         else if (sh_num == 0xffffffff)
3020                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3021         else
3022                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3023         WREG32(GRBM_GFX_INDEX, data);
3024 }
3025
3026 /**
3027  * cik_create_bitmask - create a bitmask
3028  *
3029  * @bit_width: length of the mask
3030  *
3031  * create a variable length bit mask (CIK).
3032  * Returns the bitmask.
3033  */
3034 static u32 cik_create_bitmask(u32 bit_width)
3035 {
3036         u32 i, mask = 0;
3037
3038         for (i = 0; i < bit_width; i++) {
3039                 mask <<= 1;
3040                 mask |= 1;
3041         }
3042         return mask;
3043 }
3044
3045 /**
3046  * cik_select_se_sh - select which SE, SH to address
3047  *
3048  * @rdev: radeon_device pointer
3049  * @max_rb_num: max RBs (render backends) for the asic
3050  * @se_num: number of SEs (shader engines) for the asic
3051  * @sh_per_se: number of SH blocks per SE for the asic
3052  *
3053  * Calculates the bitmask of disabled RBs (CIK).
3054  * Returns the disabled RB bitmask.
3055  */
3056 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3057                               u32 max_rb_num, u32 se_num,
3058                               u32 sh_per_se)
3059 {
3060         u32 data, mask;
3061
3062         data = RREG32(CC_RB_BACKEND_DISABLE);
3063         if (data & 1)
3064                 data &= BACKEND_DISABLE_MASK;
3065         else
3066                 data = 0;
3067         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3068
3069         data >>= BACKEND_DISABLE_SHIFT;
3070
3071         mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
3072
3073         return data & mask;
3074 }
3075
3076 /**
3077  * cik_setup_rb - setup the RBs on the asic
3078  *
3079  * @rdev: radeon_device pointer
3080  * @se_num: number of SEs (shader engines) for the asic
3081  * @sh_per_se: number of SH blocks per SE for the asic
3082  * @max_rb_num: max RBs (render backends) for the asic
3083  *
3084  * Configures per-SE/SH RB registers (CIK).
3085  */
3086 static void cik_setup_rb(struct radeon_device *rdev,
3087                          u32 se_num, u32 sh_per_se,
3088                          u32 max_rb_num)
3089 {
3090         int i, j;
3091         u32 data, mask;
3092         u32 disabled_rbs = 0;
3093         u32 enabled_rbs = 0;
3094
3095         for (i = 0; i < se_num; i++) {
3096                 for (j = 0; j < sh_per_se; j++) {
3097                         cik_select_se_sh(rdev, i, j);
3098                         data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
3099                         if (rdev->family == CHIP_HAWAII)
3100                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3101                         else
3102                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3103                 }
3104         }
3105         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3106
3107         mask = 1;
3108         for (i = 0; i < max_rb_num; i++) {
3109                 if (!(disabled_rbs & mask))
3110                         enabled_rbs |= mask;
3111                 mask <<= 1;
3112         }
3113
3114         for (i = 0; i < se_num; i++) {
3115                 cik_select_se_sh(rdev, i, 0xffffffff);
3116                 data = 0;
3117                 for (j = 0; j < sh_per_se; j++) {
3118                         switch (enabled_rbs & 3) {
3119                         case 0:
3120                                 if (j == 0)
3121                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3122                                 else
3123                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3124                                 break;
3125                         case 1:
3126                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3127                                 break;
3128                         case 2:
3129                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3130                                 break;
3131                         case 3:
3132                         default:
3133                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3134                                 break;
3135                         }
3136                         enabled_rbs >>= 2;
3137                 }
3138                 WREG32(PA_SC_RASTER_CONFIG, data);
3139         }
3140         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3141 }
3142
3143 /**
3144  * cik_gpu_init - setup the 3D engine
3145  *
3146  * @rdev: radeon_device pointer
3147  *
3148  * Configures the 3D engine and tiling configuration
3149  * registers so that the 3D engine is usable.
3150  */
3151 static void cik_gpu_init(struct radeon_device *rdev)
3152 {
3153         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3154         u32 mc_shared_chmap, mc_arb_ramcfg;
3155         u32 hdp_host_path_cntl;
3156         u32 tmp;
3157         int i, j;
3158
3159         switch (rdev->family) {
3160         case CHIP_BONAIRE:
3161                 rdev->config.cik.max_shader_engines = 2;
3162                 rdev->config.cik.max_tile_pipes = 4;
3163                 rdev->config.cik.max_cu_per_sh = 7;
3164                 rdev->config.cik.max_sh_per_se = 1;
3165                 rdev->config.cik.max_backends_per_se = 2;
3166                 rdev->config.cik.max_texture_channel_caches = 4;
3167                 rdev->config.cik.max_gprs = 256;
3168                 rdev->config.cik.max_gs_threads = 32;
3169                 rdev->config.cik.max_hw_contexts = 8;
3170
3171                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3172                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3173                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3174                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3175                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3176                 break;
3177         case CHIP_HAWAII:
3178                 rdev->config.cik.max_shader_engines = 4;
3179                 rdev->config.cik.max_tile_pipes = 16;
3180                 rdev->config.cik.max_cu_per_sh = 11;
3181                 rdev->config.cik.max_sh_per_se = 1;
3182                 rdev->config.cik.max_backends_per_se = 4;
3183                 rdev->config.cik.max_texture_channel_caches = 16;
3184                 rdev->config.cik.max_gprs = 256;
3185                 rdev->config.cik.max_gs_threads = 32;
3186                 rdev->config.cik.max_hw_contexts = 8;
3187
3188                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3189                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3190                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3191                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3192                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3193                 break;
3194         case CHIP_KAVERI:
3195                 rdev->config.cik.max_shader_engines = 1;
3196                 rdev->config.cik.max_tile_pipes = 4;
3197                 if ((rdev->pdev->device == 0x1304) ||
3198                     (rdev->pdev->device == 0x1305) ||
3199                     (rdev->pdev->device == 0x130C) ||
3200                     (rdev->pdev->device == 0x130F) ||
3201                     (rdev->pdev->device == 0x1310) ||
3202                     (rdev->pdev->device == 0x1311) ||
3203                     (rdev->pdev->device == 0x131C)) {
3204                         rdev->config.cik.max_cu_per_sh = 8;
3205                         rdev->config.cik.max_backends_per_se = 2;
3206                 } else if ((rdev->pdev->device == 0x1309) ||
3207                            (rdev->pdev->device == 0x130A) ||
3208                            (rdev->pdev->device == 0x130D) ||
3209                            (rdev->pdev->device == 0x1313) ||
3210                            (rdev->pdev->device == 0x131D)) {
3211                         rdev->config.cik.max_cu_per_sh = 6;
3212                         rdev->config.cik.max_backends_per_se = 2;
3213                 } else if ((rdev->pdev->device == 0x1306) ||
3214                            (rdev->pdev->device == 0x1307) ||
3215                            (rdev->pdev->device == 0x130B) ||
3216                            (rdev->pdev->device == 0x130E) ||
3217                            (rdev->pdev->device == 0x1315) ||
3218                            (rdev->pdev->device == 0x131B)) {
3219                         rdev->config.cik.max_cu_per_sh = 4;
3220                         rdev->config.cik.max_backends_per_se = 1;
3221                 } else {
3222                         rdev->config.cik.max_cu_per_sh = 3;
3223                         rdev->config.cik.max_backends_per_se = 1;
3224                 }
3225                 rdev->config.cik.max_sh_per_se = 1;
3226                 rdev->config.cik.max_texture_channel_caches = 4;
3227                 rdev->config.cik.max_gprs = 256;
3228                 rdev->config.cik.max_gs_threads = 16;
3229                 rdev->config.cik.max_hw_contexts = 8;
3230
3231                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3232                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3233                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3234                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3235                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3236                 break;
3237         case CHIP_KABINI:
3238         default:
3239                 rdev->config.cik.max_shader_engines = 1;
3240                 rdev->config.cik.max_tile_pipes = 2;
3241                 rdev->config.cik.max_cu_per_sh = 2;
3242                 rdev->config.cik.max_sh_per_se = 1;
3243                 rdev->config.cik.max_backends_per_se = 1;
3244                 rdev->config.cik.max_texture_channel_caches = 2;
3245                 rdev->config.cik.max_gprs = 256;
3246                 rdev->config.cik.max_gs_threads = 16;
3247                 rdev->config.cik.max_hw_contexts = 8;
3248
3249                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3250                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3251                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3252                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3253                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3254                 break;
3255         }
3256
3257         /* Initialize HDP */
3258         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3259                 WREG32((0x2c14 + j), 0x00000000);
3260                 WREG32((0x2c18 + j), 0x00000000);
3261                 WREG32((0x2c1c + j), 0x00000000);
3262                 WREG32((0x2c20 + j), 0x00000000);
3263                 WREG32((0x2c24 + j), 0x00000000);
3264         }
3265
3266         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3267
3268         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3269
3270         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3271         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3272
3273         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3274         rdev->config.cik.mem_max_burst_length_bytes = 256;
3275         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3276         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3277         if (rdev->config.cik.mem_row_size_in_kb > 4)
3278                 rdev->config.cik.mem_row_size_in_kb = 4;
3279         /* XXX use MC settings? */
3280         rdev->config.cik.shader_engine_tile_size = 32;
3281         rdev->config.cik.num_gpus = 1;
3282         rdev->config.cik.multi_gpu_tile_size = 64;
3283
3284         /* fix up row size */
3285         gb_addr_config &= ~ROW_SIZE_MASK;
3286         switch (rdev->config.cik.mem_row_size_in_kb) {
3287         case 1:
3288         default:
3289                 gb_addr_config |= ROW_SIZE(0);
3290                 break;
3291         case 2:
3292                 gb_addr_config |= ROW_SIZE(1);
3293                 break;
3294         case 4:
3295                 gb_addr_config |= ROW_SIZE(2);
3296                 break;
3297         }
3298
3299         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3300          * not have bank info, so create a custom tiling dword.
3301          * bits 3:0   num_pipes
3302          * bits 7:4   num_banks
3303          * bits 11:8  group_size
3304          * bits 15:12 row_size
3305          */
3306         rdev->config.cik.tile_config = 0;
3307         switch (rdev->config.cik.num_tile_pipes) {
3308         case 1:
3309                 rdev->config.cik.tile_config |= (0 << 0);
3310                 break;
3311         case 2:
3312                 rdev->config.cik.tile_config |= (1 << 0);
3313                 break;
3314         case 4:
3315                 rdev->config.cik.tile_config |= (2 << 0);
3316                 break;
3317         case 8:
3318         default:
3319                 /* XXX what about 12? */
3320                 rdev->config.cik.tile_config |= (3 << 0);
3321                 break;
3322         }
3323         rdev->config.cik.tile_config |=
3324                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3325         rdev->config.cik.tile_config |=
3326                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3327         rdev->config.cik.tile_config |=
3328                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3329
3330         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3331         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3332         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3333         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3334         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3335         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3336         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3337         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3338
3339         cik_tiling_mode_table_init(rdev);
3340
3341         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3342                      rdev->config.cik.max_sh_per_se,
3343                      rdev->config.cik.max_backends_per_se);
3344
3345         /* set HW defaults for 3D engine */
3346         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3347
3348         WREG32(SX_DEBUG_1, 0x20);
3349
3350         WREG32(TA_CNTL_AUX, 0x00010000);
3351
3352         tmp = RREG32(SPI_CONFIG_CNTL);
3353         tmp |= 0x03000000;
3354         WREG32(SPI_CONFIG_CNTL, tmp);
3355
3356         WREG32(SQ_CONFIG, 1);
3357
3358         WREG32(DB_DEBUG, 0);
3359
3360         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3361         tmp |= 0x00000400;
3362         WREG32(DB_DEBUG2, tmp);
3363
3364         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3365         tmp |= 0x00020200;
3366         WREG32(DB_DEBUG3, tmp);
3367
3368         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3369         tmp |= 0x00018208;
3370         WREG32(CB_HW_CONTROL, tmp);
3371
3372         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3373
3374         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3375                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3376                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3377                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3378
3379         WREG32(VGT_NUM_INSTANCES, 1);
3380
3381         WREG32(CP_PERFMON_CNTL, 0);
3382
3383         WREG32(SQ_CONFIG, 0);
3384
3385         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3386                                           FORCE_EOV_MAX_REZ_CNT(255)));
3387
3388         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3389                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3390
3391         WREG32(VGT_GS_VERTEX_REUSE, 16);
3392         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3393
3394         tmp = RREG32(HDP_MISC_CNTL);
3395         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3396         WREG32(HDP_MISC_CNTL, tmp);
3397
3398         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3399         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3400
3401         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3402         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3403
3404         udelay(50);
3405 }
3406
3407 /*
3408  * GPU scratch registers helpers function.
3409  */
3410 /**
3411  * cik_scratch_init - setup driver info for CP scratch regs
3412  *
3413  * @rdev: radeon_device pointer
3414  *
3415  * Set up the number and offset of the CP scratch registers.
3416  * NOTE: use of CP scratch registers is a legacy inferface and
3417  * is not used by default on newer asics (r6xx+).  On newer asics,
3418  * memory buffers are used for fences rather than scratch regs.
3419  */
3420 static void cik_scratch_init(struct radeon_device *rdev)
3421 {
3422         int i;
3423
3424         rdev->scratch.num_reg = 7;
3425         rdev->scratch.reg_base = SCRATCH_REG0;
3426         for (i = 0; i < rdev->scratch.num_reg; i++) {
3427                 rdev->scratch.free[i] = true;
3428                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3429         }
3430 }
3431
3432 /**
3433  * cik_ring_test - basic gfx ring test
3434  *
3435  * @rdev: radeon_device pointer
3436  * @ring: radeon_ring structure holding ring information
3437  *
3438  * Allocate a scratch register and write to it using the gfx ring (CIK).
3439  * Provides a basic gfx ring test to verify that the ring is working.
3440  * Used by cik_cp_gfx_resume();
3441  * Returns 0 on success, error on failure.
3442  */
3443 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3444 {
3445         uint32_t scratch;
3446         uint32_t tmp = 0;
3447         unsigned i;
3448         int r;
3449
3450         r = radeon_scratch_get(rdev, &scratch);
3451         if (r) {
3452                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3453                 return r;
3454         }
3455         WREG32(scratch, 0xCAFEDEAD);
3456         r = radeon_ring_lock(rdev, ring, 3);
3457         if (r) {
3458                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3459                 radeon_scratch_free(rdev, scratch);
3460                 return r;
3461         }
3462         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3463         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3464         radeon_ring_write(ring, 0xDEADBEEF);
3465         radeon_ring_unlock_commit(rdev, ring);
3466
3467         for (i = 0; i < rdev->usec_timeout; i++) {
3468                 tmp = RREG32(scratch);
3469                 if (tmp == 0xDEADBEEF)
3470                         break;
3471                 DRM_UDELAY(1);
3472         }
3473         if (i < rdev->usec_timeout) {
3474                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3475         } else {
3476                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3477                           ring->idx, scratch, tmp);
3478                 r = -EINVAL;
3479         }
3480         radeon_scratch_free(rdev, scratch);
3481         return r;
3482 }
3483
3484 /**
3485  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3486  *
3487  * @rdev: radeon_device pointer
3488  * @fence: radeon fence object
3489  *
3490  * Emits a fence sequnce number on the gfx ring and flushes
3491  * GPU caches.
3492  */
3493 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3494                              struct radeon_fence *fence)
3495 {
3496         struct radeon_ring *ring = &rdev->ring[fence->ring];
3497         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3498
3499         /* EVENT_WRITE_EOP - flush caches, send int */
3500         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3501         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3502                                  EOP_TC_ACTION_EN |
3503                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3504                                  EVENT_INDEX(5)));
3505         radeon_ring_write(ring, addr & 0xfffffffc);
3506         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3507         radeon_ring_write(ring, fence->seq);
3508         radeon_ring_write(ring, 0);
3509         /* HDP flush */
3510         /* We should be using the new WAIT_REG_MEM special op packet here
3511          * but it causes the CP to hang
3512          */
3513         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3514         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3515                                  WRITE_DATA_DST_SEL(0)));
3516         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3517         radeon_ring_write(ring, 0);
3518         radeon_ring_write(ring, 0);
3519 }
3520
3521 /**
3522  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3523  *
3524  * @rdev: radeon_device pointer
3525  * @fence: radeon fence object
3526  *
3527  * Emits a fence sequnce number on the compute ring and flushes
3528  * GPU caches.
3529  */
3530 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3531                                  struct radeon_fence *fence)
3532 {
3533         struct radeon_ring *ring = &rdev->ring[fence->ring];
3534         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3535
3536         /* RELEASE_MEM - flush caches, send int */
3537         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3538         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3539                                  EOP_TC_ACTION_EN |
3540                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3541                                  EVENT_INDEX(5)));
3542         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3543         radeon_ring_write(ring, addr & 0xfffffffc);
3544         radeon_ring_write(ring, upper_32_bits(addr));
3545         radeon_ring_write(ring, fence->seq);
3546         radeon_ring_write(ring, 0);
3547         /* HDP flush */
3548         /* We should be using the new WAIT_REG_MEM special op packet here
3549          * but it causes the CP to hang
3550          */
3551         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3552         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3553                                  WRITE_DATA_DST_SEL(0)));
3554         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3555         radeon_ring_write(ring, 0);
3556         radeon_ring_write(ring, 0);
3557 }
3558
3559 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3560                              struct radeon_ring *ring,
3561                              struct radeon_semaphore *semaphore,
3562                              bool emit_wait)
3563 {
3564         uint64_t addr = semaphore->gpu_addr;
3565         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3566
3567         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3568         radeon_ring_write(ring, addr & 0xffffffff);
3569         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3570 }
3571
3572 /**
3573  * cik_copy_cpdma - copy pages using the CP DMA engine
3574  *
3575  * @rdev: radeon_device pointer
3576  * @src_offset: src GPU address
3577  * @dst_offset: dst GPU address
3578  * @num_gpu_pages: number of GPU pages to xfer
3579  * @fence: radeon fence object
3580  *
3581  * Copy GPU paging using the CP DMA engine (CIK+).
3582  * Used by the radeon ttm implementation to move pages if
3583  * registered as the asic copy callback.
3584  */
3585 int cik_copy_cpdma(struct radeon_device *rdev,
3586                    uint64_t src_offset, uint64_t dst_offset,
3587                    unsigned num_gpu_pages,
3588                    struct radeon_fence **fence)
3589 {
3590         struct radeon_semaphore *sem = NULL;
3591         int ring_index = rdev->asic->copy.blit_ring_index;
3592         struct radeon_ring *ring = &rdev->ring[ring_index];
3593         u32 size_in_bytes, cur_size_in_bytes, control;
3594         int i, num_loops;
3595         int r = 0;
3596
3597         r = radeon_semaphore_create(rdev, &sem);
3598         if (r) {
3599                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3600                 return r;
3601         }
3602
3603         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3604         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3605         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3606         if (r) {
3607                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3608                 radeon_semaphore_free(rdev, &sem, NULL);
3609                 return r;
3610         }
3611
3612         if (radeon_fence_need_sync(*fence, ring->idx)) {
3613                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3614                                             ring->idx);
3615                 radeon_fence_note_sync(*fence, ring->idx);
3616         } else {
3617                 radeon_semaphore_free(rdev, &sem, NULL);
3618         }
3619
3620         for (i = 0; i < num_loops; i++) {
3621                 cur_size_in_bytes = size_in_bytes;
3622                 if (cur_size_in_bytes > 0x1fffff)
3623                         cur_size_in_bytes = 0x1fffff;
3624                 size_in_bytes -= cur_size_in_bytes;
3625                 control = 0;
3626                 if (size_in_bytes == 0)
3627                         control |= PACKET3_DMA_DATA_CP_SYNC;
3628                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3629                 radeon_ring_write(ring, control);
3630                 radeon_ring_write(ring, lower_32_bits(src_offset));
3631                 radeon_ring_write(ring, upper_32_bits(src_offset));
3632                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3633                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3634                 radeon_ring_write(ring, cur_size_in_bytes);
3635                 src_offset += cur_size_in_bytes;
3636                 dst_offset += cur_size_in_bytes;
3637         }
3638
3639         r = radeon_fence_emit(rdev, fence, ring->idx);
3640         if (r) {
3641                 radeon_ring_unlock_undo(rdev, ring);
3642                 return r;
3643         }
3644
3645         radeon_ring_unlock_commit(rdev, ring);
3646         radeon_semaphore_free(rdev, &sem, *fence);
3647
3648         return r;
3649 }
3650
3651 /*
3652  * IB stuff
3653  */
3654 /**
3655  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3656  *
3657  * @rdev: radeon_device pointer
3658  * @ib: radeon indirect buffer object
3659  *
3660  * Emits an DE (drawing engine) or CE (constant engine) IB
3661  * on the gfx ring.  IBs are usually generated by userspace
3662  * acceleration drivers and submitted to the kernel for
3663  * sheduling on the ring.  This function schedules the IB
3664  * on the gfx ring for execution by the GPU.
3665  */
3666 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3667 {
3668         struct radeon_ring *ring = &rdev->ring[ib->ring];
3669         u32 header, control = INDIRECT_BUFFER_VALID;
3670
3671         if (ib->is_const_ib) {
3672                 /* set switch buffer packet before const IB */
3673                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3674                 radeon_ring_write(ring, 0);
3675
3676                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3677         } else {
3678                 u32 next_rptr;
3679                 if (ring->rptr_save_reg) {
3680                         next_rptr = ring->wptr + 3 + 4;
3681                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3682                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3683                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3684                         radeon_ring_write(ring, next_rptr);
3685                 } else if (rdev->wb.enabled) {
3686                         next_rptr = ring->wptr + 5 + 4;
3687                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3688                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3689                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3690                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3691                         radeon_ring_write(ring, next_rptr);
3692                 }
3693
3694                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3695         }
3696
3697         control |= ib->length_dw |
3698                 (ib->vm ? (ib->vm->id << 24) : 0);
3699
3700         radeon_ring_write(ring, header);
3701         radeon_ring_write(ring,
3702 #ifdef __BIG_ENDIAN
3703                           (2 << 0) |
3704 #endif
3705                           (ib->gpu_addr & 0xFFFFFFFC));
3706         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3707         radeon_ring_write(ring, control);
3708 }
3709
3710 /**
3711  * cik_ib_test - basic gfx ring IB test
3712  *
3713  * @rdev: radeon_device pointer
3714  * @ring: radeon_ring structure holding ring information
3715  *
3716  * Allocate an IB and execute it on the gfx ring (CIK).
3717  * Provides a basic gfx ring test to verify that IBs are working.
3718  * Returns 0 on success, error on failure.
3719  */
3720 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3721 {
3722         struct radeon_ib ib;
3723         uint32_t scratch;
3724         uint32_t tmp = 0;
3725         unsigned i;
3726         int r;
3727
3728         r = radeon_scratch_get(rdev, &scratch);
3729         if (r) {
3730                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3731                 return r;
3732         }
3733         WREG32(scratch, 0xCAFEDEAD);
3734         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3735         if (r) {
3736                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3737                 radeon_scratch_free(rdev, scratch);
3738                 return r;
3739         }
3740         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3741         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3742         ib.ptr[2] = 0xDEADBEEF;
3743         ib.length_dw = 3;
3744         r = radeon_ib_schedule(rdev, &ib, NULL);
3745         if (r) {
3746                 radeon_scratch_free(rdev, scratch);
3747                 radeon_ib_free(rdev, &ib);
3748                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3749                 return r;
3750         }
3751         r = radeon_fence_wait(ib.fence, false);
3752         if (r) {
3753                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3754                 radeon_scratch_free(rdev, scratch);
3755                 radeon_ib_free(rdev, &ib);
3756                 return r;
3757         }
3758         for (i = 0; i < rdev->usec_timeout; i++) {
3759                 tmp = RREG32(scratch);
3760                 if (tmp == 0xDEADBEEF)
3761                         break;
3762                 DRM_UDELAY(1);
3763         }
3764         if (i < rdev->usec_timeout) {
3765                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3766         } else {
3767                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3768                           scratch, tmp);
3769                 r = -EINVAL;
3770         }
3771         radeon_scratch_free(rdev, scratch);
3772         radeon_ib_free(rdev, &ib);
3773         return r;
3774 }
3775
3776 /*
3777  * CP.
3778  * On CIK, gfx and compute now have independant command processors.
3779  *
3780  * GFX
3781  * Gfx consists of a single ring and can process both gfx jobs and
3782  * compute jobs.  The gfx CP consists of three microengines (ME):
3783  * PFP - Pre-Fetch Parser
3784  * ME - Micro Engine
3785  * CE - Constant Engine
3786  * The PFP and ME make up what is considered the Drawing Engine (DE).
3787  * The CE is an asynchronous engine used for updating buffer desciptors
3788  * used by the DE so that they can be loaded into cache in parallel
3789  * while the DE is processing state update packets.
3790  *
3791  * Compute
3792  * The compute CP consists of two microengines (ME):
3793  * MEC1 - Compute MicroEngine 1
3794  * MEC2 - Compute MicroEngine 2
3795  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3796  * The queues are exposed to userspace and are programmed directly
3797  * by the compute runtime.
3798  */
3799 /**
3800  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3801  *
3802  * @rdev: radeon_device pointer
3803  * @enable: enable or disable the MEs
3804  *
3805  * Halts or unhalts the gfx MEs.
3806  */
3807 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3808 {
3809         if (enable)
3810                 WREG32(CP_ME_CNTL, 0);
3811         else {
3812                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3813                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3814         }
3815         udelay(50);
3816 }
3817
3818 /**
3819  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3820  *
3821  * @rdev: radeon_device pointer
3822  *
3823  * Loads the gfx PFP, ME, and CE ucode.
3824  * Returns 0 for success, -EINVAL if the ucode is not available.
3825  */
3826 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3827 {
3828         const __be32 *fw_data;
3829         int i;
3830
3831         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3832                 return -EINVAL;
3833
3834         cik_cp_gfx_enable(rdev, false);
3835
3836         /* PFP */
3837         fw_data = (const __be32 *)rdev->pfp_fw->data;
3838         WREG32(CP_PFP_UCODE_ADDR, 0);
3839         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3840                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3841         WREG32(CP_PFP_UCODE_ADDR, 0);
3842
3843         /* CE */
3844         fw_data = (const __be32 *)rdev->ce_fw->data;
3845         WREG32(CP_CE_UCODE_ADDR, 0);
3846         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3847                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3848         WREG32(CP_CE_UCODE_ADDR, 0);
3849
3850         /* ME */
3851         fw_data = (const __be32 *)rdev->me_fw->data;
3852         WREG32(CP_ME_RAM_WADDR, 0);
3853         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3854                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3855         WREG32(CP_ME_RAM_WADDR, 0);
3856
3857         WREG32(CP_PFP_UCODE_ADDR, 0);
3858         WREG32(CP_CE_UCODE_ADDR, 0);
3859         WREG32(CP_ME_RAM_WADDR, 0);
3860         WREG32(CP_ME_RAM_RADDR, 0);
3861         return 0;
3862 }
3863
3864 /**
3865  * cik_cp_gfx_start - start the gfx ring
3866  *
3867  * @rdev: radeon_device pointer
3868  *
3869  * Enables the ring and loads the clear state context and other
3870  * packets required to init the ring.
3871  * Returns 0 for success, error for failure.
3872  */
3873 static int cik_cp_gfx_start(struct radeon_device *rdev)
3874 {
3875         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3876         int r, i;
3877
3878         /* init the CP */
3879         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3880         WREG32(CP_ENDIAN_SWAP, 0);
3881         WREG32(CP_DEVICE_ID, 1);
3882
3883         cik_cp_gfx_enable(rdev, true);
3884
3885         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3886         if (r) {
3887                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3888                 return r;
3889         }
3890
3891         /* init the CE partitions.  CE only used for gfx on CIK */
3892         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3893         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3894         radeon_ring_write(ring, 0xc000);
3895         radeon_ring_write(ring, 0xc000);
3896
3897         /* setup clear context state */
3898         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3899         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3900
3901         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3902         radeon_ring_write(ring, 0x80000000);
3903         radeon_ring_write(ring, 0x80000000);
3904
3905         for (i = 0; i < cik_default_size; i++)
3906                 radeon_ring_write(ring, cik_default_state[i]);
3907
3908         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3909         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3910
3911         /* set clear context state */
3912         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3913         radeon_ring_write(ring, 0);
3914
3915         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3916         radeon_ring_write(ring, 0x00000316);
3917         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3918         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3919
3920         radeon_ring_unlock_commit(rdev, ring);
3921
3922         return 0;
3923 }
3924
3925 /**
3926  * cik_cp_gfx_fini - stop the gfx ring
3927  *
3928  * @rdev: radeon_device pointer
3929  *
3930  * Stop the gfx ring and tear down the driver ring
3931  * info.
3932  */
3933 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3934 {
3935         cik_cp_gfx_enable(rdev, false);
3936         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3937 }
3938
3939 /**
3940  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3941  *
3942  * @rdev: radeon_device pointer
3943  *
3944  * Program the location and size of the gfx ring buffer
3945  * and test it to make sure it's working.
3946  * Returns 0 for success, error for failure.
3947  */
3948 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3949 {
3950         struct radeon_ring *ring;
3951         u32 tmp;
3952         u32 rb_bufsz;
3953         u64 rb_addr;
3954         int r;
3955
3956         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3957         if (rdev->family != CHIP_HAWAII)
3958                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3959
3960         /* Set the write pointer delay */
3961         WREG32(CP_RB_WPTR_DELAY, 0);
3962
3963         /* set the RB to use vmid 0 */
3964         WREG32(CP_RB_VMID, 0);
3965
3966         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3967
3968         /* ring 0 - compute and gfx */
3969         /* Set ring buffer size */
3970         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3971         rb_bufsz = order_base_2(ring->ring_size / 8);
3972         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3973 #ifdef __BIG_ENDIAN
3974         tmp |= BUF_SWAP_32BIT;
3975 #endif
3976         WREG32(CP_RB0_CNTL, tmp);
3977
3978         /* Initialize the ring buffer's read and write pointers */
3979         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3980         ring->wptr = 0;
3981         WREG32(CP_RB0_WPTR, ring->wptr);
3982
3983         /* set the wb address wether it's enabled or not */
3984         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3985         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3986
3987         /* scratch register shadowing is no longer supported */
3988         WREG32(SCRATCH_UMSK, 0);
3989
3990         if (!rdev->wb.enabled)
3991                 tmp |= RB_NO_UPDATE;
3992
3993         mdelay(1);
3994         WREG32(CP_RB0_CNTL, tmp);
3995
3996         rb_addr = ring->gpu_addr >> 8;
3997         WREG32(CP_RB0_BASE, rb_addr);
3998         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3999
4000         ring->rptr = RREG32(CP_RB0_RPTR);
4001
4002         /* start the ring */
4003         cik_cp_gfx_start(rdev);
4004         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4005         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4006         if (r) {
4007                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4008                 return r;
4009         }
4010         return 0;
4011 }
4012
4013 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
4014                               struct radeon_ring *ring)
4015 {
4016         u32 rptr;
4017
4018
4019
4020         if (rdev->wb.enabled) {
4021                 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
4022         } else {
4023                 mutex_lock(&rdev->srbm_mutex);
4024                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4025                 rptr = RREG32(CP_HQD_PQ_RPTR);
4026                 cik_srbm_select(rdev, 0, 0, 0, 0);
4027                 mutex_unlock(&rdev->srbm_mutex);
4028         }
4029
4030         return rptr;
4031 }
4032
4033 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
4034                               struct radeon_ring *ring)
4035 {
4036         u32 wptr;
4037
4038         if (rdev->wb.enabled) {
4039                 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
4040         } else {
4041                 mutex_lock(&rdev->srbm_mutex);
4042                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4043                 wptr = RREG32(CP_HQD_PQ_WPTR);
4044                 cik_srbm_select(rdev, 0, 0, 0, 0);
4045                 mutex_unlock(&rdev->srbm_mutex);
4046         }
4047
4048         return wptr;
4049 }
4050
4051 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
4052                                struct radeon_ring *ring)
4053 {
4054         rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
4055         WDOORBELL32(ring->doorbell_offset, ring->wptr);
4056 }
4057
4058 /**
4059  * cik_cp_compute_enable - enable/disable the compute CP MEs
4060  *
4061  * @rdev: radeon_device pointer
4062  * @enable: enable or disable the MEs
4063  *
4064  * Halts or unhalts the compute MEs.
4065  */
4066 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4067 {
4068         if (enable)
4069                 WREG32(CP_MEC_CNTL, 0);
4070         else
4071                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4072         udelay(50);
4073 }
4074
4075 /**
4076  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4077  *
4078  * @rdev: radeon_device pointer
4079  *
4080  * Loads the compute MEC1&2 ucode.
4081  * Returns 0 for success, -EINVAL if the ucode is not available.
4082  */
4083 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4084 {
4085         const __be32 *fw_data;
4086         int i;
4087
4088         if (!rdev->mec_fw)
4089                 return -EINVAL;
4090
4091         cik_cp_compute_enable(rdev, false);
4092
4093         /* MEC1 */
4094         fw_data = (const __be32 *)rdev->mec_fw->data;
4095         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4096         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4097                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4098         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4099
4100         if (rdev->family == CHIP_KAVERI) {
4101                 /* MEC2 */
4102                 fw_data = (const __be32 *)rdev->mec_fw->data;
4103                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4104                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4105                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4106                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4107         }
4108
4109         return 0;
4110 }
4111
4112 /**
4113  * cik_cp_compute_start - start the compute queues
4114  *
4115  * @rdev: radeon_device pointer
4116  *
4117  * Enable the compute queues.
4118  * Returns 0 for success, error for failure.
4119  */
4120 static int cik_cp_compute_start(struct radeon_device *rdev)
4121 {
4122         cik_cp_compute_enable(rdev, true);
4123
4124         return 0;
4125 }
4126
4127 /**
4128  * cik_cp_compute_fini - stop the compute queues
4129  *
4130  * @rdev: radeon_device pointer
4131  *
4132  * Stop the compute queues and tear down the driver queue
4133  * info.
4134  */
4135 static void cik_cp_compute_fini(struct radeon_device *rdev)
4136 {
4137         int i, idx, r;
4138
4139         cik_cp_compute_enable(rdev, false);
4140
4141         for (i = 0; i < 2; i++) {
4142                 if (i == 0)
4143                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4144                 else
4145                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4146
4147                 if (rdev->ring[idx].mqd_obj) {
4148                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4149                         if (unlikely(r != 0))
4150                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4151
4152                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4153                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4154
4155                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4156                         rdev->ring[idx].mqd_obj = NULL;
4157                 }
4158         }
4159 }
4160
4161 static void cik_mec_fini(struct radeon_device *rdev)
4162 {
4163         int r;
4164
4165         if (rdev->mec.hpd_eop_obj) {
4166                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4167                 if (unlikely(r != 0))
4168                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4169                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4170                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4171
4172                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4173                 rdev->mec.hpd_eop_obj = NULL;
4174         }
4175 }
4176
4177 #define MEC_HPD_SIZE 2048
4178
4179 static int cik_mec_init(struct radeon_device *rdev)
4180 {
4181         int r;
4182         u32 *hpd;
4183
4184         /*
4185          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4186          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4187          */
4188         if (rdev->family == CHIP_KAVERI)
4189                 rdev->mec.num_mec = 2;
4190         else
4191                 rdev->mec.num_mec = 1;
4192         rdev->mec.num_pipe = 4;
4193         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4194
4195         if (rdev->mec.hpd_eop_obj == NULL) {
4196                 r = radeon_bo_create(rdev,
4197                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4198                                      PAGE_SIZE, true,
4199                                      RADEON_GEM_DOMAIN_GTT, NULL,
4200                                      &rdev->mec.hpd_eop_obj);
4201                 if (r) {
4202                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4203                         return r;
4204                 }
4205         }
4206
4207         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4208         if (unlikely(r != 0)) {
4209                 cik_mec_fini(rdev);
4210                 return r;
4211         }
4212         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4213                           &rdev->mec.hpd_eop_gpu_addr);
4214         if (r) {
4215                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4216                 cik_mec_fini(rdev);
4217                 return r;
4218         }
4219         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4220         if (r) {
4221                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4222                 cik_mec_fini(rdev);
4223                 return r;
4224         }
4225
4226         /* clear memory.  Not sure if this is required or not */
4227         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4228
4229         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4230         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4231
4232         return 0;
4233 }
4234
4235 struct hqd_registers
4236 {
4237         u32 cp_mqd_base_addr;
4238         u32 cp_mqd_base_addr_hi;
4239         u32 cp_hqd_active;
4240         u32 cp_hqd_vmid;
4241         u32 cp_hqd_persistent_state;
4242         u32 cp_hqd_pipe_priority;
4243         u32 cp_hqd_queue_priority;
4244         u32 cp_hqd_quantum;
4245         u32 cp_hqd_pq_base;
4246         u32 cp_hqd_pq_base_hi;
4247         u32 cp_hqd_pq_rptr;
4248         u32 cp_hqd_pq_rptr_report_addr;
4249         u32 cp_hqd_pq_rptr_report_addr_hi;
4250         u32 cp_hqd_pq_wptr_poll_addr;
4251         u32 cp_hqd_pq_wptr_poll_addr_hi;
4252         u32 cp_hqd_pq_doorbell_control;
4253         u32 cp_hqd_pq_wptr;
4254         u32 cp_hqd_pq_control;
4255         u32 cp_hqd_ib_base_addr;
4256         u32 cp_hqd_ib_base_addr_hi;
4257         u32 cp_hqd_ib_rptr;
4258         u32 cp_hqd_ib_control;
4259         u32 cp_hqd_iq_timer;
4260         u32 cp_hqd_iq_rptr;
4261         u32 cp_hqd_dequeue_request;
4262         u32 cp_hqd_dma_offload;
4263         u32 cp_hqd_sema_cmd;
4264         u32 cp_hqd_msg_type;
4265         u32 cp_hqd_atomic0_preop_lo;
4266         u32 cp_hqd_atomic0_preop_hi;
4267         u32 cp_hqd_atomic1_preop_lo;
4268         u32 cp_hqd_atomic1_preop_hi;
4269         u32 cp_hqd_hq_scheduler0;
4270         u32 cp_hqd_hq_scheduler1;
4271         u32 cp_mqd_control;
4272 };
4273
4274 struct bonaire_mqd
4275 {
4276         u32 header;
4277         u32 dispatch_initiator;
4278         u32 dimensions[3];
4279         u32 start_idx[3];
4280         u32 num_threads[3];
4281         u32 pipeline_stat_enable;
4282         u32 perf_counter_enable;
4283         u32 pgm[2];
4284         u32 tba[2];
4285         u32 tma[2];
4286         u32 pgm_rsrc[2];
4287         u32 vmid;
4288         u32 resource_limits;
4289         u32 static_thread_mgmt01[2];
4290         u32 tmp_ring_size;
4291         u32 static_thread_mgmt23[2];
4292         u32 restart[3];
4293         u32 thread_trace_enable;
4294         u32 reserved1;
4295         u32 user_data[16];
4296         u32 vgtcs_invoke_count[2];
4297         struct hqd_registers queue_state;
4298         u32 dequeue_cntr;
4299         u32 interrupt_queue[64];
4300 };
4301
4302 /**
4303  * cik_cp_compute_resume - setup the compute queue registers
4304  *
4305  * @rdev: radeon_device pointer
4306  *
4307  * Program the compute queues and test them to make sure they
4308  * are working.
4309  * Returns 0 for success, error for failure.
4310  */
4311 static int cik_cp_compute_resume(struct radeon_device *rdev)
4312 {
4313         int r, i, idx;
4314         u32 tmp;
4315         bool use_doorbell = true;
4316         u64 hqd_gpu_addr;
4317         u64 mqd_gpu_addr;
4318         u64 eop_gpu_addr;
4319         u64 wb_gpu_addr;
4320         u32 *buf;
4321         struct bonaire_mqd *mqd;
4322
4323         r = cik_cp_compute_start(rdev);
4324         if (r)
4325                 return r;
4326
4327         /* fix up chicken bits */
4328         tmp = RREG32(CP_CPF_DEBUG);
4329         tmp |= (1 << 23);
4330         WREG32(CP_CPF_DEBUG, tmp);
4331
4332         /* init the pipes */
4333         mutex_lock(&rdev->srbm_mutex);
4334         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4335                 int me = (i < 4) ? 1 : 2;
4336                 int pipe = (i < 4) ? i : (i - 4);
4337
4338                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4339
4340                 cik_srbm_select(rdev, me, pipe, 0, 0);
4341
4342                 /* write the EOP addr */
4343                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4344                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4345
4346                 /* set the VMID assigned */
4347                 WREG32(CP_HPD_EOP_VMID, 0);
4348
4349                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4350                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4351                 tmp &= ~EOP_SIZE_MASK;
4352                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4353                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4354         }
4355         cik_srbm_select(rdev, 0, 0, 0, 0);
4356         mutex_unlock(&rdev->srbm_mutex);
4357
4358         /* init the queues.  Just two for now. */
4359         for (i = 0; i < 2; i++) {
4360                 if (i == 0)
4361                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4362                 else
4363                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4364
4365                 if (rdev->ring[idx].mqd_obj == NULL) {
4366                         r = radeon_bo_create(rdev,
4367                                              sizeof(struct bonaire_mqd),
4368                                              PAGE_SIZE, true,
4369                                              RADEON_GEM_DOMAIN_GTT, NULL,
4370                                              &rdev->ring[idx].mqd_obj);
4371                         if (r) {
4372                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4373                                 return r;
4374                         }
4375                 }
4376
4377                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4378                 if (unlikely(r != 0)) {
4379                         cik_cp_compute_fini(rdev);
4380                         return r;
4381                 }
4382                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4383                                   &mqd_gpu_addr);
4384                 if (r) {
4385                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4386                         cik_cp_compute_fini(rdev);
4387                         return r;
4388                 }
4389                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4390                 if (r) {
4391                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4392                         cik_cp_compute_fini(rdev);
4393                         return r;
4394                 }
4395
4396                 /* doorbell offset */
4397                 rdev->ring[idx].doorbell_offset =
4398                         (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
4399
4400                 /* init the mqd struct */
4401                 memset(buf, 0, sizeof(struct bonaire_mqd));
4402
4403                 mqd = (struct bonaire_mqd *)buf;
4404                 mqd->header = 0xC0310800;
4405                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4406                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4407                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4408                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4409
4410                 mutex_lock(&rdev->srbm_mutex);
4411                 cik_srbm_select(rdev, rdev->ring[idx].me,
4412                                 rdev->ring[idx].pipe,
4413                                 rdev->ring[idx].queue, 0);
4414
4415                 /* disable wptr polling */
4416                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4417                 tmp &= ~WPTR_POLL_EN;
4418                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4419
4420                 /* enable doorbell? */
4421                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4422                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4423                 if (use_doorbell)
4424                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4425                 else
4426                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4427                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4428                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4429
4430                 /* disable the queue if it's active */
4431                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4432                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4433                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4434                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4435                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4436                         for (i = 0; i < rdev->usec_timeout; i++) {
4437                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4438                                         break;
4439                                 udelay(1);
4440                         }
4441                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4442                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4443                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4444                 }
4445
4446                 /* set the pointer to the MQD */
4447                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4448                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4449                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4450                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4451                 /* set MQD vmid to 0 */
4452                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4453                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4454                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4455
4456                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4457                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4458                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4459                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4460                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4461                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4462
4463                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4464                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4465                 mqd->queue_state.cp_hqd_pq_control &=
4466                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4467
4468                 mqd->queue_state.cp_hqd_pq_control |=
4469                         order_base_2(rdev->ring[idx].ring_size / 8);
4470                 mqd->queue_state.cp_hqd_pq_control |=
4471                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4472 #ifdef __BIG_ENDIAN
4473                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4474 #endif
4475                 mqd->queue_state.cp_hqd_pq_control &=
4476                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4477                 mqd->queue_state.cp_hqd_pq_control |=
4478                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4479                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4480
4481                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4482                 if (i == 0)
4483                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4484                 else
4485                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4486                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4487                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4488                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4489                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4490                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4491
4492                 /* set the wb address wether it's enabled or not */
4493                 if (i == 0)
4494                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4495                 else
4496                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4497                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4498                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4499                         upper_32_bits(wb_gpu_addr) & 0xffff;
4500                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4501                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4502                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4503                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4504
4505                 /* enable the doorbell if requested */
4506                 if (use_doorbell) {
4507                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4508                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4509                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4510                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4511                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
4512                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4513                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4514                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4515
4516                 } else {
4517                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4518                 }
4519                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4520                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4521
4522                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4523                 rdev->ring[idx].wptr = 0;
4524                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4525                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4526                 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
4527                 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
4528
4529                 /* set the vmid for the queue */
4530                 mqd->queue_state.cp_hqd_vmid = 0;
4531                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4532
4533                 /* activate the queue */
4534                 mqd->queue_state.cp_hqd_active = 1;
4535                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4536
4537                 cik_srbm_select(rdev, 0, 0, 0, 0);
4538                 mutex_unlock(&rdev->srbm_mutex);
4539
4540                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4541                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4542
4543                 rdev->ring[idx].ready = true;
4544                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4545                 if (r)
4546                         rdev->ring[idx].ready = false;
4547         }
4548
4549         return 0;
4550 }
4551
4552 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4553 {
4554         cik_cp_gfx_enable(rdev, enable);
4555         cik_cp_compute_enable(rdev, enable);
4556 }
4557
4558 static int cik_cp_load_microcode(struct radeon_device *rdev)
4559 {
4560         int r;
4561
4562         r = cik_cp_gfx_load_microcode(rdev);
4563         if (r)
4564                 return r;
4565         r = cik_cp_compute_load_microcode(rdev);
4566         if (r)
4567                 return r;
4568
4569         return 0;
4570 }
4571
4572 static void cik_cp_fini(struct radeon_device *rdev)
4573 {
4574         cik_cp_gfx_fini(rdev);
4575         cik_cp_compute_fini(rdev);
4576 }
4577
4578 static int cik_cp_resume(struct radeon_device *rdev)
4579 {
4580         int r;
4581
4582         cik_enable_gui_idle_interrupt(rdev, false);
4583
4584         r = cik_cp_load_microcode(rdev);
4585         if (r)
4586                 return r;
4587
4588         r = cik_cp_gfx_resume(rdev);
4589         if (r)
4590                 return r;
4591         r = cik_cp_compute_resume(rdev);
4592         if (r)
4593                 return r;
4594
4595         cik_enable_gui_idle_interrupt(rdev, true);
4596
4597         return 0;
4598 }
4599
4600 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4601 {
4602         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4603                 RREG32(GRBM_STATUS));
4604         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4605                 RREG32(GRBM_STATUS2));
4606         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4607                 RREG32(GRBM_STATUS_SE0));
4608         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4609                 RREG32(GRBM_STATUS_SE1));
4610         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4611                 RREG32(GRBM_STATUS_SE2));
4612         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4613                 RREG32(GRBM_STATUS_SE3));
4614         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4615                 RREG32(SRBM_STATUS));
4616         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4617                 RREG32(SRBM_STATUS2));
4618         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4619                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4620         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4621                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4622         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4623         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4624                  RREG32(CP_STALLED_STAT1));
4625         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4626                  RREG32(CP_STALLED_STAT2));
4627         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4628                  RREG32(CP_STALLED_STAT3));
4629         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4630                  RREG32(CP_CPF_BUSY_STAT));
4631         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4632                  RREG32(CP_CPF_STALLED_STAT1));
4633         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4634         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4635         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4636                  RREG32(CP_CPC_STALLED_STAT1));
4637         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4638 }
4639
4640 /**
4641  * cik_gpu_check_soft_reset - check which blocks are busy
4642  *
4643  * @rdev: radeon_device pointer
4644  *
4645  * Check which blocks are busy and return the relevant reset
4646  * mask to be used by cik_gpu_soft_reset().
4647  * Returns a mask of the blocks to be reset.
4648  */
4649 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4650 {
4651         u32 reset_mask = 0;
4652         u32 tmp;
4653
4654         /* GRBM_STATUS */
4655         tmp = RREG32(GRBM_STATUS);
4656         if (tmp & (PA_BUSY | SC_BUSY |
4657                    BCI_BUSY | SX_BUSY |
4658                    TA_BUSY | VGT_BUSY |
4659                    DB_BUSY | CB_BUSY |
4660                    GDS_BUSY | SPI_BUSY |
4661                    IA_BUSY | IA_BUSY_NO_DMA))
4662                 reset_mask |= RADEON_RESET_GFX;
4663
4664         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4665                 reset_mask |= RADEON_RESET_CP;
4666
4667         /* GRBM_STATUS2 */
4668         tmp = RREG32(GRBM_STATUS2);
4669         if (tmp & RLC_BUSY)
4670                 reset_mask |= RADEON_RESET_RLC;
4671
4672         /* SDMA0_STATUS_REG */
4673         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4674         if (!(tmp & SDMA_IDLE))
4675                 reset_mask |= RADEON_RESET_DMA;
4676
4677         /* SDMA1_STATUS_REG */
4678         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4679         if (!(tmp & SDMA_IDLE))
4680                 reset_mask |= RADEON_RESET_DMA1;
4681
4682         /* SRBM_STATUS2 */
4683         tmp = RREG32(SRBM_STATUS2);
4684         if (tmp & SDMA_BUSY)
4685                 reset_mask |= RADEON_RESET_DMA;
4686
4687         if (tmp & SDMA1_BUSY)
4688                 reset_mask |= RADEON_RESET_DMA1;
4689
4690         /* SRBM_STATUS */
4691         tmp = RREG32(SRBM_STATUS);
4692
4693         if (tmp & IH_BUSY)
4694                 reset_mask |= RADEON_RESET_IH;
4695
4696         if (tmp & SEM_BUSY)
4697                 reset_mask |= RADEON_RESET_SEM;
4698
4699         if (tmp & GRBM_RQ_PENDING)
4700                 reset_mask |= RADEON_RESET_GRBM;
4701
4702         if (tmp & VMC_BUSY)
4703                 reset_mask |= RADEON_RESET_VMC;
4704
4705         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4706                    MCC_BUSY | MCD_BUSY))
4707                 reset_mask |= RADEON_RESET_MC;
4708
4709         if (evergreen_is_display_hung(rdev))
4710                 reset_mask |= RADEON_RESET_DISPLAY;
4711
4712         /* Skip MC reset as it's mostly likely not hung, just busy */
4713         if (reset_mask & RADEON_RESET_MC) {
4714                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4715                 reset_mask &= ~RADEON_RESET_MC;
4716         }
4717
4718         return reset_mask;
4719 }
4720
4721 /**
4722  * cik_gpu_soft_reset - soft reset GPU
4723  *
4724  * @rdev: radeon_device pointer
4725  * @reset_mask: mask of which blocks to reset
4726  *
4727  * Soft reset the blocks specified in @reset_mask.
4728  */
4729 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4730 {
4731         struct evergreen_mc_save save;
4732         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4733         u32 tmp;
4734
4735         if (reset_mask == 0)
4736                 return;
4737
4738         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4739
4740         cik_print_gpu_status_regs(rdev);
4741         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4742                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4743         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4744                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4745
4746         /* disable CG/PG */
4747         cik_fini_pg(rdev);
4748         cik_fini_cg(rdev);
4749
4750         /* stop the rlc */
4751         cik_rlc_stop(rdev);
4752
4753         /* Disable GFX parsing/prefetching */
4754         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4755
4756         /* Disable MEC parsing/prefetching */
4757         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4758
4759         if (reset_mask & RADEON_RESET_DMA) {
4760                 /* sdma0 */
4761                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4762                 tmp |= SDMA_HALT;
4763                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4764         }
4765         if (reset_mask & RADEON_RESET_DMA1) {
4766                 /* sdma1 */
4767                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4768                 tmp |= SDMA_HALT;
4769                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4770         }
4771
4772         evergreen_mc_stop(rdev, &save);
4773         if (evergreen_mc_wait_for_idle(rdev)) {
4774                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4775         }
4776
4777         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4778                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4779
4780         if (reset_mask & RADEON_RESET_CP) {
4781                 grbm_soft_reset |= SOFT_RESET_CP;
4782
4783                 srbm_soft_reset |= SOFT_RESET_GRBM;
4784         }
4785
4786         if (reset_mask & RADEON_RESET_DMA)
4787                 srbm_soft_reset |= SOFT_RESET_SDMA;
4788
4789         if (reset_mask & RADEON_RESET_DMA1)
4790                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4791
4792         if (reset_mask & RADEON_RESET_DISPLAY)
4793                 srbm_soft_reset |= SOFT_RESET_DC;
4794
4795         if (reset_mask & RADEON_RESET_RLC)
4796                 grbm_soft_reset |= SOFT_RESET_RLC;
4797
4798         if (reset_mask & RADEON_RESET_SEM)
4799                 srbm_soft_reset |= SOFT_RESET_SEM;
4800
4801         if (reset_mask & RADEON_RESET_IH)
4802                 srbm_soft_reset |= SOFT_RESET_IH;
4803
4804         if (reset_mask & RADEON_RESET_GRBM)
4805                 srbm_soft_reset |= SOFT_RESET_GRBM;
4806
4807         if (reset_mask & RADEON_RESET_VMC)
4808                 srbm_soft_reset |= SOFT_RESET_VMC;
4809
4810         if (!(rdev->flags & RADEON_IS_IGP)) {
4811                 if (reset_mask & RADEON_RESET_MC)
4812                         srbm_soft_reset |= SOFT_RESET_MC;
4813         }
4814
4815         if (grbm_soft_reset) {
4816                 tmp = RREG32(GRBM_SOFT_RESET);
4817                 tmp |= grbm_soft_reset;
4818                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4819                 WREG32(GRBM_SOFT_RESET, tmp);
4820                 tmp = RREG32(GRBM_SOFT_RESET);
4821
4822                 udelay(50);
4823
4824                 tmp &= ~grbm_soft_reset;
4825                 WREG32(GRBM_SOFT_RESET, tmp);
4826                 tmp = RREG32(GRBM_SOFT_RESET);
4827         }
4828
4829         if (srbm_soft_reset) {
4830                 tmp = RREG32(SRBM_SOFT_RESET);
4831                 tmp |= srbm_soft_reset;
4832                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4833                 WREG32(SRBM_SOFT_RESET, tmp);
4834                 tmp = RREG32(SRBM_SOFT_RESET);
4835
4836                 udelay(50);
4837
4838                 tmp &= ~srbm_soft_reset;
4839                 WREG32(SRBM_SOFT_RESET, tmp);
4840                 tmp = RREG32(SRBM_SOFT_RESET);
4841         }
4842
4843         /* Wait a little for things to settle down */
4844         udelay(50);
4845
4846         evergreen_mc_resume(rdev, &save);
4847         udelay(50);
4848
4849         cik_print_gpu_status_regs(rdev);
4850 }
4851
4852 /**
4853  * cik_asic_reset - soft reset GPU
4854  *
4855  * @rdev: radeon_device pointer
4856  *
4857  * Look up which blocks are hung and attempt
4858  * to reset them.
4859  * Returns 0 for success.
4860  */
4861 int cik_asic_reset(struct radeon_device *rdev)
4862 {
4863         u32 reset_mask;
4864
4865         reset_mask = cik_gpu_check_soft_reset(rdev);
4866
4867         if (reset_mask)
4868                 r600_set_bios_scratch_engine_hung(rdev, true);
4869
4870         cik_gpu_soft_reset(rdev, reset_mask);
4871
4872         reset_mask = cik_gpu_check_soft_reset(rdev);
4873
4874         if (!reset_mask)
4875                 r600_set_bios_scratch_engine_hung(rdev, false);
4876
4877         return 0;
4878 }
4879
4880 /**
4881  * cik_gfx_is_lockup - check if the 3D engine is locked up
4882  *
4883  * @rdev: radeon_device pointer
4884  * @ring: radeon_ring structure holding ring information
4885  *
4886  * Check if the 3D engine is locked up (CIK).
4887  * Returns true if the engine is locked, false if not.
4888  */
4889 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4890 {
4891         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4892
4893         if (!(reset_mask & (RADEON_RESET_GFX |
4894                             RADEON_RESET_COMPUTE |
4895                             RADEON_RESET_CP))) {
4896                 radeon_ring_lockup_update(ring);
4897                 return false;
4898         }
4899         /* force CP activities */
4900         radeon_ring_force_activity(rdev, ring);
4901         return radeon_ring_test_lockup(rdev, ring);
4902 }
4903
4904 /* MC */
4905 /**
4906  * cik_mc_program - program the GPU memory controller
4907  *
4908  * @rdev: radeon_device pointer
4909  *
4910  * Set the location of vram, gart, and AGP in the GPU's
4911  * physical address space (CIK).
4912  */
4913 static void cik_mc_program(struct radeon_device *rdev)
4914 {
4915         struct evergreen_mc_save save;
4916         u32 tmp;
4917         int i, j;
4918
4919         /* Initialize HDP */
4920         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4921                 WREG32((0x2c14 + j), 0x00000000);
4922                 WREG32((0x2c18 + j), 0x00000000);
4923                 WREG32((0x2c1c + j), 0x00000000);
4924                 WREG32((0x2c20 + j), 0x00000000);
4925                 WREG32((0x2c24 + j), 0x00000000);
4926         }
4927         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4928
4929         evergreen_mc_stop(rdev, &save);
4930         if (radeon_mc_wait_for_idle(rdev)) {
4931                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4932         }
4933         /* Lockout access through VGA aperture*/
4934         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4935         /* Update configuration */
4936         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4937                rdev->mc.vram_start >> 12);
4938         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4939                rdev->mc.vram_end >> 12);
4940         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4941                rdev->vram_scratch.gpu_addr >> 12);
4942         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4943         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4944         WREG32(MC_VM_FB_LOCATION, tmp);
4945         /* XXX double check these! */
4946         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4947         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4948         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4949         WREG32(MC_VM_AGP_BASE, 0);
4950         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4951         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4952         if (radeon_mc_wait_for_idle(rdev)) {
4953                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4954         }
4955         evergreen_mc_resume(rdev, &save);
4956         /* we need to own VRAM, so turn off the VGA renderer here
4957          * to stop it overwriting our objects */
4958         rv515_vga_render_disable(rdev);
4959 }
4960
4961 /**
4962  * cik_mc_init - initialize the memory controller driver params
4963  *
4964  * @rdev: radeon_device pointer
4965  *
4966  * Look up the amount of vram, vram width, and decide how to place
4967  * vram and gart within the GPU's physical address space (CIK).
4968  * Returns 0 for success.
4969  */
4970 static int cik_mc_init(struct radeon_device *rdev)
4971 {
4972         u32 tmp;
4973         int chansize, numchan;
4974
4975         /* Get VRAM informations */
4976         rdev->mc.vram_is_ddr = true;
4977         tmp = RREG32(MC_ARB_RAMCFG);
4978         if (tmp & CHANSIZE_MASK) {
4979                 chansize = 64;
4980         } else {
4981                 chansize = 32;
4982         }
4983         tmp = RREG32(MC_SHARED_CHMAP);
4984         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4985         case 0:
4986         default:
4987                 numchan = 1;
4988                 break;
4989         case 1:
4990                 numchan = 2;
4991                 break;
4992         case 2:
4993                 numchan = 4;
4994                 break;
4995         case 3:
4996                 numchan = 8;
4997                 break;
4998         case 4:
4999                 numchan = 3;
5000                 break;
5001         case 5:
5002                 numchan = 6;
5003                 break;
5004         case 6:
5005                 numchan = 10;
5006                 break;
5007         case 7:
5008                 numchan = 12;
5009                 break;
5010         case 8:
5011                 numchan = 16;
5012                 break;
5013         }
5014         rdev->mc.vram_width = numchan * chansize;
5015         /* Could aper size report 0 ? */
5016         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5017         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5018         /* size in MB on si */
5019         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5020         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5021         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5022         si_vram_gtt_location(rdev, &rdev->mc);
5023         radeon_update_bandwidth_info(rdev);
5024
5025         return 0;
5026 }
5027
5028 /*
5029  * GART
5030  * VMID 0 is the physical GPU addresses as used by the kernel.
5031  * VMIDs 1-15 are used for userspace clients and are handled
5032  * by the radeon vm/hsa code.
5033  */
5034 /**
5035  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5036  *
5037  * @rdev: radeon_device pointer
5038  *
5039  * Flush the TLB for the VMID 0 page table (CIK).
5040  */
5041 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5042 {
5043         /* flush hdp cache */
5044         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5045
5046         /* bits 0-15 are the VM contexts0-15 */
5047         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5048 }
5049
5050 /**
5051  * cik_pcie_gart_enable - gart enable
5052  *
5053  * @rdev: radeon_device pointer
5054  *
5055  * This sets up the TLBs, programs the page tables for VMID0,
5056  * sets up the hw for VMIDs 1-15 which are allocated on
5057  * demand, and sets up the global locations for the LDS, GDS,
5058  * and GPUVM for FSA64 clients (CIK).
5059  * Returns 0 for success, errors for failure.
5060  */
5061 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5062 {
5063         int r, i;
5064
5065         if (rdev->gart.robj == NULL) {
5066                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5067                 return -EINVAL;
5068         }
5069         r = radeon_gart_table_vram_pin(rdev);
5070         if (r)
5071                 return r;
5072         radeon_gart_restore(rdev);
5073         /* Setup TLB control */
5074         WREG32(MC_VM_MX_L1_TLB_CNTL,
5075                (0xA << 7) |
5076                ENABLE_L1_TLB |
5077                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5078                ENABLE_ADVANCED_DRIVER_MODEL |
5079                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5080         /* Setup L2 cache */
5081         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5082                ENABLE_L2_FRAGMENT_PROCESSING |
5083                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5084                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5085                EFFECTIVE_L2_QUEUE_SIZE(7) |
5086                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5087         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5088         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5089                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5090         /* setup context0 */
5091         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5092         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5093         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5094         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5095                         (u32)(rdev->dummy_page.addr >> 12));
5096         WREG32(VM_CONTEXT0_CNTL2, 0);
5097         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5098                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5099
5100         WREG32(0x15D4, 0);
5101         WREG32(0x15D8, 0);
5102         WREG32(0x15DC, 0);
5103
5104         /* empty context1-15 */
5105         /* FIXME start with 4G, once using 2 level pt switch to full
5106          * vm size space
5107          */
5108         /* set vm size, must be a multiple of 4 */
5109         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5110         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5111         for (i = 1; i < 16; i++) {
5112                 if (i < 8)
5113                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5114                                rdev->gart.table_addr >> 12);
5115                 else
5116                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5117                                rdev->gart.table_addr >> 12);
5118         }
5119
5120         /* enable context1-15 */
5121         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5122                (u32)(rdev->dummy_page.addr >> 12));
5123         WREG32(VM_CONTEXT1_CNTL2, 4);
5124         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5125                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5126                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5127                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5128                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5129                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5130                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5131                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5132                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5133                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5134                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5135                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5136                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5137
5138         /* TC cache setup ??? */
5139         WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
5140         WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
5141         WREG32(TC_CFG_L1_STORE_POLICY, 0);
5142
5143         WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
5144         WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
5145         WREG32(TC_CFG_L2_STORE_POLICY0, 0);
5146         WREG32(TC_CFG_L2_STORE_POLICY1, 0);
5147         WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
5148
5149         WREG32(TC_CFG_L1_VOLATILE, 0);
5150         WREG32(TC_CFG_L2_VOLATILE, 0);
5151
5152         if (rdev->family == CHIP_KAVERI) {
5153                 u32 tmp = RREG32(CHUB_CONTROL);
5154                 tmp &= ~BYPASS_VM;
5155                 WREG32(CHUB_CONTROL, tmp);
5156         }
5157
5158         /* XXX SH_MEM regs */
5159         /* where to put LDS, scratch, GPUVM in FSA64 space */
5160         mutex_lock(&rdev->srbm_mutex);
5161         for (i = 0; i < 16; i++) {
5162                 cik_srbm_select(rdev, 0, 0, 0, i);
5163                 /* CP and shaders */
5164                 WREG32(SH_MEM_CONFIG, 0);
5165                 WREG32(SH_MEM_APE1_BASE, 1);
5166                 WREG32(SH_MEM_APE1_LIMIT, 0);
5167                 WREG32(SH_MEM_BASES, 0);
5168                 /* SDMA GFX */
5169                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5170                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5171                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5172                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5173                 /* XXX SDMA RLC - todo */
5174         }
5175         cik_srbm_select(rdev, 0, 0, 0, 0);
5176         mutex_unlock(&rdev->srbm_mutex);
5177
5178         cik_pcie_gart_tlb_flush(rdev);
5179         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5180                  (unsigned)(rdev->mc.gtt_size >> 20),
5181                  (unsigned long long)rdev->gart.table_addr);
5182         rdev->gart.ready = true;
5183         return 0;
5184 }
5185
5186 /**
5187  * cik_pcie_gart_disable - gart disable
5188  *
5189  * @rdev: radeon_device pointer
5190  *
5191  * This disables all VM page table (CIK).
5192  */
5193 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5194 {
5195         /* Disable all tables */
5196         WREG32(VM_CONTEXT0_CNTL, 0);
5197         WREG32(VM_CONTEXT1_CNTL, 0);
5198         /* Setup TLB control */
5199         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5200                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5201         /* Setup L2 cache */
5202         WREG32(VM_L2_CNTL,
5203                ENABLE_L2_FRAGMENT_PROCESSING |
5204                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5205                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5206                EFFECTIVE_L2_QUEUE_SIZE(7) |
5207                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5208         WREG32(VM_L2_CNTL2, 0);
5209         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5210                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5211         radeon_gart_table_vram_unpin(rdev);
5212 }
5213
5214 /**
5215  * cik_pcie_gart_fini - vm fini callback
5216  *
5217  * @rdev: radeon_device pointer
5218  *
5219  * Tears down the driver GART/VM setup (CIK).
5220  */
5221 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5222 {
5223         cik_pcie_gart_disable(rdev);
5224         radeon_gart_table_vram_free(rdev);
5225         radeon_gart_fini(rdev);
5226 }
5227
5228 /* vm parser */
5229 /**
5230  * cik_ib_parse - vm ib_parse callback
5231  *
5232  * @rdev: radeon_device pointer
5233  * @ib: indirect buffer pointer
5234  *
5235  * CIK uses hw IB checking so this is a nop (CIK).
5236  */
5237 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5238 {
5239         return 0;
5240 }
5241
5242 /*
5243  * vm
5244  * VMID 0 is the physical GPU addresses as used by the kernel.
5245  * VMIDs 1-15 are used for userspace clients and are handled
5246  * by the radeon vm/hsa code.
5247  */
5248 /**
5249  * cik_vm_init - cik vm init callback
5250  *
5251  * @rdev: radeon_device pointer
5252  *
5253  * Inits cik specific vm parameters (number of VMs, base of vram for
5254  * VMIDs 1-15) (CIK).
5255  * Returns 0 for success.
5256  */
5257 int cik_vm_init(struct radeon_device *rdev)
5258 {
5259         /* number of VMs */
5260         rdev->vm_manager.nvm = 16;
5261         /* base offset of vram pages */
5262         if (rdev->flags & RADEON_IS_IGP) {
5263                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5264                 tmp <<= 22;
5265                 rdev->vm_manager.vram_base_offset = tmp;
5266         } else
5267                 rdev->vm_manager.vram_base_offset = 0;
5268
5269         return 0;
5270 }
5271
5272 /**
5273  * cik_vm_fini - cik vm fini callback
5274  *
5275  * @rdev: radeon_device pointer
5276  *
5277  * Tear down any asic specific VM setup (CIK).
5278  */
5279 void cik_vm_fini(struct radeon_device *rdev)
5280 {
5281 }
5282
5283 /**
5284  * cik_vm_decode_fault - print human readable fault info
5285  *
5286  * @rdev: radeon_device pointer
5287  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5288  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5289  *
5290  * Print human readable fault information (CIK).
5291  */
5292 static void cik_vm_decode_fault(struct radeon_device *rdev,
5293                                 u32 status, u32 addr, u32 mc_client)
5294 {
5295         u32 mc_id;
5296         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5297         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5298         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5299                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5300
5301         if (rdev->family == CHIP_HAWAII)
5302                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5303         else
5304                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5305
5306         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5307                protections, vmid, addr,
5308                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5309                block, mc_client, mc_id);
5310 }
5311
5312 /**
5313  * cik_vm_flush - cik vm flush using the CP
5314  *
5315  * @rdev: radeon_device pointer
5316  *
5317  * Update the page table base and flush the VM TLB
5318  * using the CP (CIK).
5319  */
5320 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5321 {
5322         struct radeon_ring *ring = &rdev->ring[ridx];
5323
5324         if (vm == NULL)
5325                 return;
5326
5327         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5328         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5329                                  WRITE_DATA_DST_SEL(0)));
5330         if (vm->id < 8) {
5331                 radeon_ring_write(ring,
5332                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5333         } else {
5334                 radeon_ring_write(ring,
5335                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5336         }
5337         radeon_ring_write(ring, 0);
5338         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5339
5340         /* update SH_MEM_* regs */
5341         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5342         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5343                                  WRITE_DATA_DST_SEL(0)));
5344         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5345         radeon_ring_write(ring, 0);
5346         radeon_ring_write(ring, VMID(vm->id));
5347
5348         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5349         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5350                                  WRITE_DATA_DST_SEL(0)));
5351         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5352         radeon_ring_write(ring, 0);
5353
5354         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5355         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5356         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5357         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5358
5359         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5360         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5361                                  WRITE_DATA_DST_SEL(0)));
5362         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5363         radeon_ring_write(ring, 0);
5364         radeon_ring_write(ring, VMID(0));
5365
5366         /* HDP flush */
5367         /* We should be using the WAIT_REG_MEM packet here like in
5368          * cik_fence_ring_emit(), but it causes the CP to hang in this
5369          * context...
5370          */
5371         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5372         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5373                                  WRITE_DATA_DST_SEL(0)));
5374         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5375         radeon_ring_write(ring, 0);
5376         radeon_ring_write(ring, 0);
5377
5378         /* bits 0-15 are the VM contexts0-15 */
5379         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5380         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5381                                  WRITE_DATA_DST_SEL(0)));
5382         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5383         radeon_ring_write(ring, 0);
5384         radeon_ring_write(ring, 1 << vm->id);
5385
5386         /* compute doesn't have PFP */
5387         if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5388                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5389                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5390                 radeon_ring_write(ring, 0x0);
5391         }
5392 }
5393
5394 /*
5395  * RLC
5396  * The RLC is a multi-purpose microengine that handles a
5397  * variety of functions, the most important of which is
5398  * the interrupt controller.
5399  */
5400 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5401                                           bool enable)
5402 {
5403         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5404
5405         if (enable)
5406                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5407         else
5408                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5409         WREG32(CP_INT_CNTL_RING0, tmp);
5410 }
5411
5412 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5413 {
5414         u32 tmp;
5415
5416         tmp = RREG32(RLC_LB_CNTL);
5417         if (enable)
5418                 tmp |= LOAD_BALANCE_ENABLE;
5419         else
5420                 tmp &= ~LOAD_BALANCE_ENABLE;
5421         WREG32(RLC_LB_CNTL, tmp);
5422 }
5423
5424 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5425 {
5426         u32 i, j, k;
5427         u32 mask;
5428
5429         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5430                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5431                         cik_select_se_sh(rdev, i, j);
5432                         for (k = 0; k < rdev->usec_timeout; k++) {
5433                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5434                                         break;
5435                                 udelay(1);
5436                         }
5437                 }
5438         }
5439         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5440
5441         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5442         for (k = 0; k < rdev->usec_timeout; k++) {
5443                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5444                         break;
5445                 udelay(1);
5446         }
5447 }
5448
5449 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5450 {
5451         u32 tmp;
5452
5453         tmp = RREG32(RLC_CNTL);
5454         if (tmp != rlc)
5455                 WREG32(RLC_CNTL, rlc);
5456 }
5457
5458 static u32 cik_halt_rlc(struct radeon_device *rdev)
5459 {
5460         u32 data, orig;
5461
5462         orig = data = RREG32(RLC_CNTL);
5463
5464         if (data & RLC_ENABLE) {
5465                 u32 i;
5466
5467                 data &= ~RLC_ENABLE;
5468                 WREG32(RLC_CNTL, data);
5469
5470                 for (i = 0; i < rdev->usec_timeout; i++) {
5471                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5472                                 break;
5473                         udelay(1);
5474                 }
5475
5476                 cik_wait_for_rlc_serdes(rdev);
5477         }
5478
5479         return orig;
5480 }
5481
5482 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5483 {
5484         u32 tmp, i, mask;
5485
5486         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5487         WREG32(RLC_GPR_REG2, tmp);
5488
5489         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5490         for (i = 0; i < rdev->usec_timeout; i++) {
5491                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5492                         break;
5493                 udelay(1);
5494         }
5495
5496         for (i = 0; i < rdev->usec_timeout; i++) {
5497                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5498                         break;
5499                 udelay(1);
5500         }
5501 }
5502
5503 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5504 {
5505         u32 tmp;
5506
5507         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5508         WREG32(RLC_GPR_REG2, tmp);
5509 }
5510
5511 /**
5512  * cik_rlc_stop - stop the RLC ME
5513  *
5514  * @rdev: radeon_device pointer
5515  *
5516  * Halt the RLC ME (MicroEngine) (CIK).
5517  */
5518 static void cik_rlc_stop(struct radeon_device *rdev)
5519 {
5520         WREG32(RLC_CNTL, 0);
5521
5522         cik_enable_gui_idle_interrupt(rdev, false);
5523
5524         cik_wait_for_rlc_serdes(rdev);
5525 }
5526
5527 /**
5528  * cik_rlc_start - start the RLC ME
5529  *
5530  * @rdev: radeon_device pointer
5531  *
5532  * Unhalt the RLC ME (MicroEngine) (CIK).
5533  */
5534 static void cik_rlc_start(struct radeon_device *rdev)
5535 {
5536         WREG32(RLC_CNTL, RLC_ENABLE);
5537
5538         cik_enable_gui_idle_interrupt(rdev, true);
5539
5540         udelay(50);
5541 }
5542
5543 /**
5544  * cik_rlc_resume - setup the RLC hw
5545  *
5546  * @rdev: radeon_device pointer
5547  *
5548  * Initialize the RLC registers, load the ucode,
5549  * and start the RLC (CIK).
5550  * Returns 0 for success, -EINVAL if the ucode is not available.
5551  */
5552 static int cik_rlc_resume(struct radeon_device *rdev)
5553 {
5554         u32 i, size, tmp;
5555         const __be32 *fw_data;
5556
5557         if (!rdev->rlc_fw)
5558                 return -EINVAL;
5559
5560         switch (rdev->family) {
5561         case CHIP_BONAIRE:
5562         case CHIP_HAWAII:
5563         default:
5564                 size = BONAIRE_RLC_UCODE_SIZE;
5565                 break;
5566         case CHIP_KAVERI:
5567                 size = KV_RLC_UCODE_SIZE;
5568                 break;
5569         case CHIP_KABINI:
5570                 size = KB_RLC_UCODE_SIZE;
5571                 break;
5572         }
5573
5574         cik_rlc_stop(rdev);
5575
5576         /* disable CG */
5577         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5578         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5579
5580         si_rlc_reset(rdev);
5581
5582         cik_init_pg(rdev);
5583
5584         cik_init_cg(rdev);
5585
5586         WREG32(RLC_LB_CNTR_INIT, 0);
5587         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5588
5589         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5590         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5591         WREG32(RLC_LB_PARAMS, 0x00600408);
5592         WREG32(RLC_LB_CNTL, 0x80000004);
5593
5594         WREG32(RLC_MC_CNTL, 0);
5595         WREG32(RLC_UCODE_CNTL, 0);
5596
5597         fw_data = (const __be32 *)rdev->rlc_fw->data;
5598                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5599         for (i = 0; i < size; i++)
5600                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5601         WREG32(RLC_GPM_UCODE_ADDR, 0);
5602
5603         /* XXX - find out what chips support lbpw */
5604         cik_enable_lbpw(rdev, false);
5605
5606         if (rdev->family == CHIP_BONAIRE)
5607                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5608
5609         cik_rlc_start(rdev);
5610
5611         return 0;
5612 }
5613
5614 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5615 {
5616         u32 data, orig, tmp, tmp2;
5617
5618         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5619
5620         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5621                 cik_enable_gui_idle_interrupt(rdev, true);
5622
5623                 tmp = cik_halt_rlc(rdev);
5624
5625                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5626                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5627                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5628                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5629                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5630
5631                 cik_update_rlc(rdev, tmp);
5632
5633                 data |= CGCG_EN | CGLS_EN;
5634         } else {
5635                 cik_enable_gui_idle_interrupt(rdev, false);
5636
5637                 RREG32(CB_CGTT_SCLK_CTRL);
5638                 RREG32(CB_CGTT_SCLK_CTRL);
5639                 RREG32(CB_CGTT_SCLK_CTRL);
5640                 RREG32(CB_CGTT_SCLK_CTRL);
5641
5642                 data &= ~(CGCG_EN | CGLS_EN);
5643         }
5644
5645         if (orig != data)
5646                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5647
5648 }
5649
5650 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5651 {
5652         u32 data, orig, tmp = 0;
5653
5654         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5655                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5656                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5657                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5658                                 data |= CP_MEM_LS_EN;
5659                                 if (orig != data)
5660                                         WREG32(CP_MEM_SLP_CNTL, data);
5661                         }
5662                 }
5663
5664                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5665                 data &= 0xfffffffd;
5666                 if (orig != data)
5667                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5668
5669                 tmp = cik_halt_rlc(rdev);
5670
5671                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5672                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5673                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5674                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5675                 WREG32(RLC_SERDES_WR_CTRL, data);
5676
5677                 cik_update_rlc(rdev, tmp);
5678
5679                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5680                         orig = data = RREG32(CGTS_SM_CTRL_REG);
5681                         data &= ~SM_MODE_MASK;
5682                         data |= SM_MODE(0x2);
5683                         data |= SM_MODE_ENABLE;
5684                         data &= ~CGTS_OVERRIDE;
5685                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5686                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5687                                 data &= ~CGTS_LS_OVERRIDE;
5688                         data &= ~ON_MONITOR_ADD_MASK;
5689                         data |= ON_MONITOR_ADD_EN;
5690                         data |= ON_MONITOR_ADD(0x96);
5691                         if (orig != data)
5692                                 WREG32(CGTS_SM_CTRL_REG, data);
5693                 }
5694         } else {
5695                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5696                 data |= 0x00000002;
5697                 if (orig != data)
5698                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5699
5700                 data = RREG32(RLC_MEM_SLP_CNTL);
5701                 if (data & RLC_MEM_LS_EN) {
5702                         data &= ~RLC_MEM_LS_EN;
5703                         WREG32(RLC_MEM_SLP_CNTL, data);
5704                 }
5705
5706                 data = RREG32(CP_MEM_SLP_CNTL);
5707                 if (data & CP_MEM_LS_EN) {
5708                         data &= ~CP_MEM_LS_EN;
5709                         WREG32(CP_MEM_SLP_CNTL, data);
5710                 }
5711
5712                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5713                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5714                 if (orig != data)
5715                         WREG32(CGTS_SM_CTRL_REG, data);
5716
5717                 tmp = cik_halt_rlc(rdev);
5718
5719                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5720                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5721                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5722                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5723                 WREG32(RLC_SERDES_WR_CTRL, data);
5724
5725                 cik_update_rlc(rdev, tmp);
5726         }
5727 }
5728
5729 static const u32 mc_cg_registers[] =
5730 {
5731         MC_HUB_MISC_HUB_CG,
5732         MC_HUB_MISC_SIP_CG,
5733         MC_HUB_MISC_VM_CG,
5734         MC_XPB_CLK_GAT,
5735         ATC_MISC_CG,
5736         MC_CITF_MISC_WR_CG,
5737         MC_CITF_MISC_RD_CG,
5738         MC_CITF_MISC_VM_CG,
5739         VM_L2_CG,
5740 };
5741
5742 static void cik_enable_mc_ls(struct radeon_device *rdev,
5743                              bool enable)
5744 {
5745         int i;
5746         u32 orig, data;
5747
5748         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5749                 orig = data = RREG32(mc_cg_registers[i]);
5750                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5751                         data |= MC_LS_ENABLE;
5752                 else
5753                         data &= ~MC_LS_ENABLE;
5754                 if (data != orig)
5755                         WREG32(mc_cg_registers[i], data);
5756         }
5757 }
5758
5759 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5760                                bool enable)
5761 {
5762         int i;
5763         u32 orig, data;
5764
5765         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5766                 orig = data = RREG32(mc_cg_registers[i]);
5767                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5768                         data |= MC_CG_ENABLE;
5769                 else
5770                         data &= ~MC_CG_ENABLE;
5771                 if (data != orig)
5772                         WREG32(mc_cg_registers[i], data);
5773         }
5774 }
5775
5776 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5777                                  bool enable)
5778 {
5779         u32 orig, data;
5780
5781         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5782                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5783                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5784         } else {
5785                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5786                 data |= 0xff000000;
5787                 if (data != orig)
5788                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5789
5790                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5791                 data |= 0xff000000;
5792                 if (data != orig)
5793                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5794         }
5795 }
5796
5797 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5798                                  bool enable)
5799 {
5800         u32 orig, data;
5801
5802         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5803                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5804                 data |= 0x100;
5805                 if (orig != data)
5806                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5807
5808                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5809                 data |= 0x100;
5810                 if (orig != data)
5811                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5812         } else {
5813                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5814                 data &= ~0x100;
5815                 if (orig != data)
5816                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5817
5818                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5819                 data &= ~0x100;
5820                 if (orig != data)
5821                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5822         }
5823 }
5824
5825 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5826                                 bool enable)
5827 {
5828         u32 orig, data;
5829
5830         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5831                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5832                 data = 0xfff;
5833                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5834
5835                 orig = data = RREG32(UVD_CGC_CTRL);
5836                 data |= DCM;
5837                 if (orig != data)
5838                         WREG32(UVD_CGC_CTRL, data);
5839         } else {
5840                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5841                 data &= ~0xfff;
5842                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5843
5844                 orig = data = RREG32(UVD_CGC_CTRL);
5845                 data &= ~DCM;
5846                 if (orig != data)
5847                         WREG32(UVD_CGC_CTRL, data);
5848         }
5849 }
5850
5851 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5852                                bool enable)
5853 {
5854         u32 orig, data;
5855
5856         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5857
5858         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5859                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5860                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5861         else
5862                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5863                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5864
5865         if (orig != data)
5866                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
5867 }
5868
5869 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5870                                 bool enable)
5871 {
5872         u32 orig, data;
5873
5874         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5875
5876         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5877                 data &= ~CLOCK_GATING_DIS;
5878         else
5879                 data |= CLOCK_GATING_DIS;
5880
5881         if (orig != data)
5882                 WREG32(HDP_HOST_PATH_CNTL, data);
5883 }
5884
5885 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5886                               bool enable)
5887 {
5888         u32 orig, data;
5889
5890         orig = data = RREG32(HDP_MEM_POWER_LS);
5891
5892         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5893                 data |= HDP_LS_ENABLE;
5894         else
5895                 data &= ~HDP_LS_ENABLE;
5896
5897         if (orig != data)
5898                 WREG32(HDP_MEM_POWER_LS, data);
5899 }
5900
5901 void cik_update_cg(struct radeon_device *rdev,
5902                    u32 block, bool enable)
5903 {
5904
5905         if (block & RADEON_CG_BLOCK_GFX) {
5906                 cik_enable_gui_idle_interrupt(rdev, false);
5907                 /* order matters! */
5908                 if (enable) {
5909                         cik_enable_mgcg(rdev, true);
5910                         cik_enable_cgcg(rdev, true);
5911                 } else {
5912                         cik_enable_cgcg(rdev, false);
5913                         cik_enable_mgcg(rdev, false);
5914                 }
5915                 cik_enable_gui_idle_interrupt(rdev, true);
5916         }
5917
5918         if (block & RADEON_CG_BLOCK_MC) {
5919                 if (!(rdev->flags & RADEON_IS_IGP)) {
5920                         cik_enable_mc_mgcg(rdev, enable);
5921                         cik_enable_mc_ls(rdev, enable);
5922                 }
5923         }
5924
5925         if (block & RADEON_CG_BLOCK_SDMA) {
5926                 cik_enable_sdma_mgcg(rdev, enable);
5927                 cik_enable_sdma_mgls(rdev, enable);
5928         }
5929
5930         if (block & RADEON_CG_BLOCK_BIF) {
5931                 cik_enable_bif_mgls(rdev, enable);
5932         }
5933
5934         if (block & RADEON_CG_BLOCK_UVD) {
5935                 if (rdev->has_uvd)
5936                         cik_enable_uvd_mgcg(rdev, enable);
5937         }
5938
5939         if (block & RADEON_CG_BLOCK_HDP) {
5940                 cik_enable_hdp_mgcg(rdev, enable);
5941                 cik_enable_hdp_ls(rdev, enable);
5942         }
5943 }
5944
5945 static void cik_init_cg(struct radeon_device *rdev)
5946 {
5947
5948         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5949
5950         if (rdev->has_uvd)
5951                 si_init_uvd_internal_cg(rdev);
5952
5953         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5954                              RADEON_CG_BLOCK_SDMA |
5955                              RADEON_CG_BLOCK_BIF |
5956                              RADEON_CG_BLOCK_UVD |
5957                              RADEON_CG_BLOCK_HDP), true);
5958 }
5959
5960 static void cik_fini_cg(struct radeon_device *rdev)
5961 {
5962         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5963                              RADEON_CG_BLOCK_SDMA |
5964                              RADEON_CG_BLOCK_BIF |
5965                              RADEON_CG_BLOCK_UVD |
5966                              RADEON_CG_BLOCK_HDP), false);
5967
5968         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5969 }
5970
5971 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5972                                           bool enable)
5973 {
5974         u32 data, orig;
5975
5976         orig = data = RREG32(RLC_PG_CNTL);
5977         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5978                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5979         else
5980                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5981         if (orig != data)
5982                 WREG32(RLC_PG_CNTL, data);
5983 }
5984
5985 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5986                                           bool enable)
5987 {
5988         u32 data, orig;
5989
5990         orig = data = RREG32(RLC_PG_CNTL);
5991         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5992                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5993         else
5994                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5995         if (orig != data)
5996                 WREG32(RLC_PG_CNTL, data);
5997 }
5998
5999 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6000 {
6001         u32 data, orig;
6002
6003         orig = data = RREG32(RLC_PG_CNTL);
6004         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6005                 data &= ~DISABLE_CP_PG;
6006         else
6007                 data |= DISABLE_CP_PG;
6008         if (orig != data)
6009                 WREG32(RLC_PG_CNTL, data);
6010 }
6011
6012 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6013 {
6014         u32 data, orig;
6015
6016         orig = data = RREG32(RLC_PG_CNTL);
6017         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6018                 data &= ~DISABLE_GDS_PG;
6019         else
6020                 data |= DISABLE_GDS_PG;
6021         if (orig != data)
6022                 WREG32(RLC_PG_CNTL, data);
6023 }
6024
6025 #define CP_ME_TABLE_SIZE    96
6026 #define CP_ME_TABLE_OFFSET  2048
6027 #define CP_MEC_TABLE_OFFSET 4096
6028
6029 void cik_init_cp_pg_table(struct radeon_device *rdev)
6030 {
6031         const __be32 *fw_data;
6032         volatile u32 *dst_ptr;
6033         int me, i, max_me = 4;
6034         u32 bo_offset = 0;
6035         u32 table_offset;
6036
6037         if (rdev->family == CHIP_KAVERI)
6038                 max_me = 5;
6039
6040         if (rdev->rlc.cp_table_ptr == NULL)
6041                 return;
6042
6043         /* write the cp table buffer */
6044         dst_ptr = rdev->rlc.cp_table_ptr;
6045         for (me = 0; me < max_me; me++) {
6046                 if (me == 0) {
6047                         fw_data = (const __be32 *)rdev->ce_fw->data;
6048                         table_offset = CP_ME_TABLE_OFFSET;
6049                 } else if (me == 1) {
6050                         fw_data = (const __be32 *)rdev->pfp_fw->data;
6051                         table_offset = CP_ME_TABLE_OFFSET;
6052                 } else if (me == 2) {
6053                         fw_data = (const __be32 *)rdev->me_fw->data;
6054                         table_offset = CP_ME_TABLE_OFFSET;
6055                 } else {
6056                         fw_data = (const __be32 *)rdev->mec_fw->data;
6057                         table_offset = CP_MEC_TABLE_OFFSET;
6058                 }
6059
6060                 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6061                         dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6062                 }
6063                 bo_offset += CP_ME_TABLE_SIZE;
6064         }
6065 }
6066
6067 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6068                                 bool enable)
6069 {
6070         u32 data, orig;
6071
6072         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6073                 orig = data = RREG32(RLC_PG_CNTL);
6074                 data |= GFX_PG_ENABLE;
6075                 if (orig != data)
6076                         WREG32(RLC_PG_CNTL, data);
6077
6078                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6079                 data |= AUTO_PG_EN;
6080                 if (orig != data)
6081                         WREG32(RLC_AUTO_PG_CTRL, data);
6082         } else {
6083                 orig = data = RREG32(RLC_PG_CNTL);
6084                 data &= ~GFX_PG_ENABLE;
6085                 if (orig != data)
6086                         WREG32(RLC_PG_CNTL, data);
6087
6088                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6089                 data &= ~AUTO_PG_EN;
6090                 if (orig != data)
6091                         WREG32(RLC_AUTO_PG_CTRL, data);
6092
6093                 data = RREG32(DB_RENDER_CONTROL);
6094         }
6095 }
6096
6097 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6098 {
6099         u32 mask = 0, tmp, tmp1;
6100         int i;
6101
6102         cik_select_se_sh(rdev, se, sh);
6103         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6104         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6105         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6106
6107         tmp &= 0xffff0000;
6108
6109         tmp |= tmp1;
6110         tmp >>= 16;
6111
6112         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6113                 mask <<= 1;
6114                 mask |= 1;
6115         }
6116
6117         return (~tmp) & mask;
6118 }
6119
6120 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6121 {
6122         u32 i, j, k, active_cu_number = 0;
6123         u32 mask, counter, cu_bitmap;
6124         u32 tmp = 0;
6125
6126         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6127                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6128                         mask = 1;
6129                         cu_bitmap = 0;
6130                         counter = 0;
6131                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6132                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6133                                         if (counter < 2)
6134                                                 cu_bitmap |= mask;
6135                                         counter ++;
6136                                 }
6137                                 mask <<= 1;
6138                         }
6139
6140                         active_cu_number += counter;
6141                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6142                 }
6143         }
6144
6145         WREG32(RLC_PG_AO_CU_MASK, tmp);
6146
6147         tmp = RREG32(RLC_MAX_PG_CU);
6148         tmp &= ~MAX_PU_CU_MASK;
6149         tmp |= MAX_PU_CU(active_cu_number);
6150         WREG32(RLC_MAX_PG_CU, tmp);
6151 }
6152
6153 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6154                                        bool enable)
6155 {
6156         u32 data, orig;
6157
6158         orig = data = RREG32(RLC_PG_CNTL);
6159         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6160                 data |= STATIC_PER_CU_PG_ENABLE;
6161         else
6162                 data &= ~STATIC_PER_CU_PG_ENABLE;
6163         if (orig != data)
6164                 WREG32(RLC_PG_CNTL, data);
6165 }
6166
6167 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6168                                         bool enable)
6169 {
6170         u32 data, orig;
6171
6172         orig = data = RREG32(RLC_PG_CNTL);
6173         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6174                 data |= DYN_PER_CU_PG_ENABLE;
6175         else
6176                 data &= ~DYN_PER_CU_PG_ENABLE;
6177         if (orig != data)
6178                 WREG32(RLC_PG_CNTL, data);
6179 }
6180
6181 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6182 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6183
6184 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6185 {
6186         u32 data, orig;
6187         u32 i;
6188
6189         if (rdev->rlc.cs_data) {
6190                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6191                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6192                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6193                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6194         } else {
6195                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6196                 for (i = 0; i < 3; i++)
6197                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6198         }
6199         if (rdev->rlc.reg_list) {
6200                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6201                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6202                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6203         }
6204
6205         orig = data = RREG32(RLC_PG_CNTL);
6206         data |= GFX_PG_SRC;
6207         if (orig != data)
6208                 WREG32(RLC_PG_CNTL, data);
6209
6210         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6211         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6212
6213         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6214         data &= ~IDLE_POLL_COUNT_MASK;
6215         data |= IDLE_POLL_COUNT(0x60);
6216         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6217
6218         data = 0x10101010;
6219         WREG32(RLC_PG_DELAY, data);
6220
6221         data = RREG32(RLC_PG_DELAY_2);
6222         data &= ~0xff;
6223         data |= 0x3;
6224         WREG32(RLC_PG_DELAY_2, data);
6225
6226         data = RREG32(RLC_AUTO_PG_CTRL);
6227         data &= ~GRBM_REG_SGIT_MASK;
6228         data |= GRBM_REG_SGIT(0x700);
6229         WREG32(RLC_AUTO_PG_CTRL, data);
6230
6231 }
6232
6233 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6234 {
6235         cik_enable_gfx_cgpg(rdev, enable);
6236         cik_enable_gfx_static_mgpg(rdev, enable);
6237         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6238 }
6239
6240 u32 cik_get_csb_size(struct radeon_device *rdev)
6241 {
6242         u32 count = 0;
6243         const struct cs_section_def *sect = NULL;
6244         const struct cs_extent_def *ext = NULL;
6245
6246         if (rdev->rlc.cs_data == NULL)
6247                 return 0;
6248
6249         /* begin clear state */
6250         count += 2;
6251         /* context control state */
6252         count += 3;
6253
6254         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6255                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6256                         if (sect->id == SECT_CONTEXT)
6257                                 count += 2 + ext->reg_count;
6258                         else
6259                                 return 0;
6260                 }
6261         }
6262         /* pa_sc_raster_config/pa_sc_raster_config1 */
6263         count += 4;
6264         /* end clear state */
6265         count += 2;
6266         /* clear state */
6267         count += 2;
6268
6269         return count;
6270 }
6271
6272 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6273 {
6274         u32 count = 0, i;
6275         const struct cs_section_def *sect = NULL;
6276         const struct cs_extent_def *ext = NULL;
6277
6278         if (rdev->rlc.cs_data == NULL)
6279                 return;
6280         if (buffer == NULL)
6281                 return;
6282
6283         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6284         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6285
6286         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6287         buffer[count++] = cpu_to_le32(0x80000000);
6288         buffer[count++] = cpu_to_le32(0x80000000);
6289
6290         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6291                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6292                         if (sect->id == SECT_CONTEXT) {
6293                                 buffer[count++] =
6294                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6295                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6296                                 for (i = 0; i < ext->reg_count; i++)
6297                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6298                         } else {
6299                                 return;
6300                         }
6301                 }
6302         }
6303
6304         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6305         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6306         switch (rdev->family) {
6307         case CHIP_BONAIRE:
6308                 buffer[count++] = cpu_to_le32(0x16000012);
6309                 buffer[count++] = cpu_to_le32(0x00000000);
6310                 break;
6311         case CHIP_KAVERI:
6312                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6313                 buffer[count++] = cpu_to_le32(0x00000000);
6314                 break;
6315         case CHIP_KABINI:
6316                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6317                 buffer[count++] = cpu_to_le32(0x00000000);
6318                 break;
6319         case CHIP_HAWAII:
6320                 buffer[count++] = 0x3a00161a;
6321                 buffer[count++] = 0x0000002e;
6322                 break;
6323         default:
6324                 buffer[count++] = cpu_to_le32(0x00000000);
6325                 buffer[count++] = cpu_to_le32(0x00000000);
6326                 break;
6327         }
6328
6329         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6330         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6331
6332         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6333         buffer[count++] = cpu_to_le32(0);
6334 }
6335
6336 static void cik_init_pg(struct radeon_device *rdev)
6337 {
6338         if (rdev->pg_flags) {
6339                 cik_enable_sck_slowdown_on_pu(rdev, true);
6340                 cik_enable_sck_slowdown_on_pd(rdev, true);
6341                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6342                         cik_init_gfx_cgpg(rdev);
6343                         cik_enable_cp_pg(rdev, true);
6344                         cik_enable_gds_pg(rdev, true);
6345                 }
6346                 cik_init_ao_cu_mask(rdev);
6347                 cik_update_gfx_pg(rdev, true);
6348         }
6349 }
6350
6351 static void cik_fini_pg(struct radeon_device *rdev)
6352 {
6353         if (rdev->pg_flags) {
6354                 cik_update_gfx_pg(rdev, false);
6355                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6356                         cik_enable_cp_pg(rdev, false);
6357                         cik_enable_gds_pg(rdev, false);
6358                 }
6359         }
6360 }
6361
6362 /*
6363  * Interrupts
6364  * Starting with r6xx, interrupts are handled via a ring buffer.
6365  * Ring buffers are areas of GPU accessible memory that the GPU
6366  * writes interrupt vectors into and the host reads vectors out of.
6367  * There is a rptr (read pointer) that determines where the
6368  * host is currently reading, and a wptr (write pointer)
6369  * which determines where the GPU has written.  When the
6370  * pointers are equal, the ring is idle.  When the GPU
6371  * writes vectors to the ring buffer, it increments the
6372  * wptr.  When there is an interrupt, the host then starts
6373  * fetching commands and processing them until the pointers are
6374  * equal again at which point it updates the rptr.
6375  */
6376
6377 /**
6378  * cik_enable_interrupts - Enable the interrupt ring buffer
6379  *
6380  * @rdev: radeon_device pointer
6381  *
6382  * Enable the interrupt ring buffer (CIK).
6383  */
6384 static void cik_enable_interrupts(struct radeon_device *rdev)
6385 {
6386         u32 ih_cntl = RREG32(IH_CNTL);
6387         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6388
6389         ih_cntl |= ENABLE_INTR;
6390         ih_rb_cntl |= IH_RB_ENABLE;
6391         WREG32(IH_CNTL, ih_cntl);
6392         WREG32(IH_RB_CNTL, ih_rb_cntl);
6393         rdev->ih.enabled = true;
6394 }
6395
6396 /**
6397  * cik_disable_interrupts - Disable the interrupt ring buffer
6398  *
6399  * @rdev: radeon_device pointer
6400  *
6401  * Disable the interrupt ring buffer (CIK).
6402  */
6403 static void cik_disable_interrupts(struct radeon_device *rdev)
6404 {
6405         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6406         u32 ih_cntl = RREG32(IH_CNTL);
6407
6408         ih_rb_cntl &= ~IH_RB_ENABLE;
6409         ih_cntl &= ~ENABLE_INTR;
6410         WREG32(IH_RB_CNTL, ih_rb_cntl);
6411         WREG32(IH_CNTL, ih_cntl);
6412         /* set rptr, wptr to 0 */
6413         WREG32(IH_RB_RPTR, 0);
6414         WREG32(IH_RB_WPTR, 0);
6415         rdev->ih.enabled = false;
6416         rdev->ih.rptr = 0;
6417 }
6418
6419 /**
6420  * cik_disable_interrupt_state - Disable all interrupt sources
6421  *
6422  * @rdev: radeon_device pointer
6423  *
6424  * Clear all interrupt enable bits used by the driver (CIK).
6425  */
6426 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6427 {
6428         u32 tmp;
6429
6430         /* gfx ring */
6431         tmp = RREG32(CP_INT_CNTL_RING0) &
6432                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6433         WREG32(CP_INT_CNTL_RING0, tmp);
6434         /* sdma */
6435         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6436         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6437         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6438         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6439         /* compute queues */
6440         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6441         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6442         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6443         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6444         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6445         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6446         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6447         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6448         /* grbm */
6449         WREG32(GRBM_INT_CNTL, 0);
6450         /* vline/vblank, etc. */
6451         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6452         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6453         if (rdev->num_crtc >= 4) {
6454                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6455                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6456         }
6457         if (rdev->num_crtc >= 6) {
6458                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6459                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6460         }
6461
6462         /* dac hotplug */
6463         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6464
6465         /* digital hotplug */
6466         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6467         WREG32(DC_HPD1_INT_CONTROL, tmp);
6468         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6469         WREG32(DC_HPD2_INT_CONTROL, tmp);
6470         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6471         WREG32(DC_HPD3_INT_CONTROL, tmp);
6472         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6473         WREG32(DC_HPD4_INT_CONTROL, tmp);
6474         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6475         WREG32(DC_HPD5_INT_CONTROL, tmp);
6476         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6477         WREG32(DC_HPD6_INT_CONTROL, tmp);
6478
6479 }
6480
6481 /**
6482  * cik_irq_init - init and enable the interrupt ring
6483  *
6484  * @rdev: radeon_device pointer
6485  *
6486  * Allocate a ring buffer for the interrupt controller,
6487  * enable the RLC, disable interrupts, enable the IH
6488  * ring buffer and enable it (CIK).
6489  * Called at device load and reume.
6490  * Returns 0 for success, errors for failure.
6491  */
6492 static int cik_irq_init(struct radeon_device *rdev)
6493 {
6494         int ret = 0;
6495         int rb_bufsz;
6496         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6497
6498         /* allocate ring */
6499         ret = r600_ih_ring_alloc(rdev);
6500         if (ret)
6501                 return ret;
6502
6503         /* disable irqs */
6504         cik_disable_interrupts(rdev);
6505
6506         /* init rlc */
6507         ret = cik_rlc_resume(rdev);
6508         if (ret) {
6509                 r600_ih_ring_fini(rdev);
6510                 return ret;
6511         }
6512
6513         /* setup interrupt control */
6514         /* XXX this should actually be a bus address, not an MC address. same on older asics */
6515         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6516         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6517         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6518          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6519          */
6520         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6521         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6522         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6523         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6524
6525         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6526         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6527
6528         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6529                       IH_WPTR_OVERFLOW_CLEAR |
6530                       (rb_bufsz << 1));
6531
6532         if (rdev->wb.enabled)
6533                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6534
6535         /* set the writeback address whether it's enabled or not */
6536         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6537         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6538
6539         WREG32(IH_RB_CNTL, ih_rb_cntl);
6540
6541         /* set rptr, wptr to 0 */
6542         WREG32(IH_RB_RPTR, 0);
6543         WREG32(IH_RB_WPTR, 0);
6544
6545         /* Default settings for IH_CNTL (disabled at first) */
6546         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6547         /* RPTR_REARM only works if msi's are enabled */
6548         if (rdev->msi_enabled)
6549                 ih_cntl |= RPTR_REARM;
6550         WREG32(IH_CNTL, ih_cntl);
6551
6552         /* force the active interrupt state to all disabled */
6553         cik_disable_interrupt_state(rdev);
6554
6555         pci_set_master(rdev->pdev);
6556
6557         /* enable irqs */
6558         cik_enable_interrupts(rdev);
6559
6560         return ret;
6561 }
6562
6563 /**
6564  * cik_irq_set - enable/disable interrupt sources
6565  *
6566  * @rdev: radeon_device pointer
6567  *
6568  * Enable interrupt sources on the GPU (vblanks, hpd,
6569  * etc.) (CIK).
6570  * Returns 0 for success, errors for failure.
6571  */
6572 int cik_irq_set(struct radeon_device *rdev)
6573 {
6574         u32 cp_int_cntl;
6575         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6576         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6577         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6578         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6579         u32 grbm_int_cntl = 0;
6580         u32 dma_cntl, dma_cntl1;
6581         u32 thermal_int;
6582
6583         if (!rdev->irq.installed) {
6584                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6585                 return -EINVAL;
6586         }
6587         /* don't enable anything if the ih is disabled */
6588         if (!rdev->ih.enabled) {
6589                 cik_disable_interrupts(rdev);
6590                 /* force the active interrupt state to all disabled */
6591                 cik_disable_interrupt_state(rdev);
6592                 return 0;
6593         }
6594
6595         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6596                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6597         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6598
6599         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6600         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6601         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6602         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6603         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6604         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6605
6606         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6607         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6608
6609         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6610         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6611         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6612         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6613         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6614         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6615         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6616         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6617
6618         if (rdev->flags & RADEON_IS_IGP)
6619                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6620                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
6621         else
6622                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6623                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6624
6625         /* enable CP interrupts on all rings */
6626         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6627                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6628                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6629         }
6630         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6631                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6632                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6633                 if (ring->me == 1) {
6634                         switch (ring->pipe) {
6635                         case 0:
6636                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6637                                 break;
6638                         case 1:
6639                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6640                                 break;
6641                         case 2:
6642                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6643                                 break;
6644                         case 3:
6645                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6646                                 break;
6647                         default:
6648                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6649                                 break;
6650                         }
6651                 } else if (ring->me == 2) {
6652                         switch (ring->pipe) {
6653                         case 0:
6654                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6655                                 break;
6656                         case 1:
6657                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6658                                 break;
6659                         case 2:
6660                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6661                                 break;
6662                         case 3:
6663                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6664                                 break;
6665                         default:
6666                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6667                                 break;
6668                         }
6669                 } else {
6670                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6671                 }
6672         }
6673         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6674                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6675                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6676                 if (ring->me == 1) {
6677                         switch (ring->pipe) {
6678                         case 0:
6679                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6680                                 break;
6681                         case 1:
6682                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6683                                 break;
6684                         case 2:
6685                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6686                                 break;
6687                         case 3:
6688                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6689                                 break;
6690                         default:
6691                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6692                                 break;
6693                         }
6694                 } else if (ring->me == 2) {
6695                         switch (ring->pipe) {
6696                         case 0:
6697                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6698                                 break;
6699                         case 1:
6700                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6701                                 break;
6702                         case 2:
6703                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6704                                 break;
6705                         case 3:
6706                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6707                                 break;
6708                         default:
6709                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6710                                 break;
6711                         }
6712                 } else {
6713                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6714                 }
6715         }
6716
6717         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6718                 DRM_DEBUG("cik_irq_set: sw int dma\n");
6719                 dma_cntl |= TRAP_ENABLE;
6720         }
6721
6722         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6723                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6724                 dma_cntl1 |= TRAP_ENABLE;
6725         }
6726
6727         if (rdev->irq.crtc_vblank_int[0] ||
6728             atomic_read(&rdev->irq.pflip[0])) {
6729                 DRM_DEBUG("cik_irq_set: vblank 0\n");
6730                 crtc1 |= VBLANK_INTERRUPT_MASK;
6731         }
6732         if (rdev->irq.crtc_vblank_int[1] ||
6733             atomic_read(&rdev->irq.pflip[1])) {
6734                 DRM_DEBUG("cik_irq_set: vblank 1\n");
6735                 crtc2 |= VBLANK_INTERRUPT_MASK;
6736         }
6737         if (rdev->irq.crtc_vblank_int[2] ||
6738             atomic_read(&rdev->irq.pflip[2])) {
6739                 DRM_DEBUG("cik_irq_set: vblank 2\n");
6740                 crtc3 |= VBLANK_INTERRUPT_MASK;
6741         }
6742         if (rdev->irq.crtc_vblank_int[3] ||
6743             atomic_read(&rdev->irq.pflip[3])) {
6744                 DRM_DEBUG("cik_irq_set: vblank 3\n");
6745                 crtc4 |= VBLANK_INTERRUPT_MASK;
6746         }
6747         if (rdev->irq.crtc_vblank_int[4] ||
6748             atomic_read(&rdev->irq.pflip[4])) {
6749                 DRM_DEBUG("cik_irq_set: vblank 4\n");
6750                 crtc5 |= VBLANK_INTERRUPT_MASK;
6751         }
6752         if (rdev->irq.crtc_vblank_int[5] ||
6753             atomic_read(&rdev->irq.pflip[5])) {
6754                 DRM_DEBUG("cik_irq_set: vblank 5\n");
6755                 crtc6 |= VBLANK_INTERRUPT_MASK;
6756         }
6757         if (rdev->irq.hpd[0]) {
6758                 DRM_DEBUG("cik_irq_set: hpd 1\n");
6759                 hpd1 |= DC_HPDx_INT_EN;
6760         }
6761         if (rdev->irq.hpd[1]) {
6762                 DRM_DEBUG("cik_irq_set: hpd 2\n");
6763                 hpd2 |= DC_HPDx_INT_EN;
6764         }
6765         if (rdev->irq.hpd[2]) {
6766                 DRM_DEBUG("cik_irq_set: hpd 3\n");
6767                 hpd3 |= DC_HPDx_INT_EN;
6768         }
6769         if (rdev->irq.hpd[3]) {
6770                 DRM_DEBUG("cik_irq_set: hpd 4\n");
6771                 hpd4 |= DC_HPDx_INT_EN;
6772         }
6773         if (rdev->irq.hpd[4]) {
6774                 DRM_DEBUG("cik_irq_set: hpd 5\n");
6775                 hpd5 |= DC_HPDx_INT_EN;
6776         }
6777         if (rdev->irq.hpd[5]) {
6778                 DRM_DEBUG("cik_irq_set: hpd 6\n");
6779                 hpd6 |= DC_HPDx_INT_EN;
6780         }
6781
6782         if (rdev->irq.dpm_thermal) {
6783                 DRM_DEBUG("dpm thermal\n");
6784                 if (rdev->flags & RADEON_IS_IGP)
6785                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6786                 else
6787                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6788         }
6789
6790         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6791
6792         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6793         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6794
6795         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6796         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6797         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6798         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6799         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6800         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6801         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6802         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6803
6804         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6805
6806         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6807         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6808         if (rdev->num_crtc >= 4) {
6809                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6810                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6811         }
6812         if (rdev->num_crtc >= 6) {
6813                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6814                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6815         }
6816
6817         WREG32(DC_HPD1_INT_CONTROL, hpd1);
6818         WREG32(DC_HPD2_INT_CONTROL, hpd2);
6819         WREG32(DC_HPD3_INT_CONTROL, hpd3);
6820         WREG32(DC_HPD4_INT_CONTROL, hpd4);
6821         WREG32(DC_HPD5_INT_CONTROL, hpd5);
6822         WREG32(DC_HPD6_INT_CONTROL, hpd6);
6823
6824         if (rdev->flags & RADEON_IS_IGP)
6825                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6826         else
6827                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
6828
6829         return 0;
6830 }
6831
6832 /**
6833  * cik_irq_ack - ack interrupt sources
6834  *
6835  * @rdev: radeon_device pointer
6836  *
6837  * Ack interrupt sources on the GPU (vblanks, hpd,
6838  * etc.) (CIK).  Certain interrupts sources are sw
6839  * generated and do not require an explicit ack.
6840  */
6841 static inline void cik_irq_ack(struct radeon_device *rdev)
6842 {
6843         u32 tmp;
6844
6845         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6846         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6847         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6848         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6849         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6850         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6851         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6852
6853         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6854                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6855         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6856                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6857         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6858                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6859         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6860                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6861
6862         if (rdev->num_crtc >= 4) {
6863                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6864                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6865                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6866                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6867                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6868                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6869                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6870                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6871         }
6872
6873         if (rdev->num_crtc >= 6) {
6874                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6875                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6876                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6877                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6878                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6879                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6880                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6881                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6882         }
6883
6884         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6885                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6886                 tmp |= DC_HPDx_INT_ACK;
6887                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6888         }
6889         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6890                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6891                 tmp |= DC_HPDx_INT_ACK;
6892                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6893         }
6894         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6895                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6896                 tmp |= DC_HPDx_INT_ACK;
6897                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6898         }
6899         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6900                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6901                 tmp |= DC_HPDx_INT_ACK;
6902                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6903         }
6904         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6905                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6906                 tmp |= DC_HPDx_INT_ACK;
6907                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6908         }
6909         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6910                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6911                 tmp |= DC_HPDx_INT_ACK;
6912                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6913         }
6914 }
6915
6916 /**
6917  * cik_irq_disable - disable interrupts
6918  *
6919  * @rdev: radeon_device pointer
6920  *
6921  * Disable interrupts on the hw (CIK).
6922  */
6923 static void cik_irq_disable(struct radeon_device *rdev)
6924 {
6925         cik_disable_interrupts(rdev);
6926         /* Wait and acknowledge irq */
6927         mdelay(1);
6928         cik_irq_ack(rdev);
6929         cik_disable_interrupt_state(rdev);
6930 }
6931
6932 /**
6933  * cik_irq_disable - disable interrupts for suspend
6934  *
6935  * @rdev: radeon_device pointer
6936  *
6937  * Disable interrupts and stop the RLC (CIK).
6938  * Used for suspend.
6939  */
6940 static void cik_irq_suspend(struct radeon_device *rdev)
6941 {
6942         cik_irq_disable(rdev);
6943         cik_rlc_stop(rdev);
6944 }
6945
6946 /**
6947  * cik_irq_fini - tear down interrupt support
6948  *
6949  * @rdev: radeon_device pointer
6950  *
6951  * Disable interrupts on the hw and free the IH ring
6952  * buffer (CIK).
6953  * Used for driver unload.
6954  */
6955 static void cik_irq_fini(struct radeon_device *rdev)
6956 {
6957         cik_irq_suspend(rdev);
6958         r600_ih_ring_fini(rdev);
6959 }
6960
6961 /**
6962  * cik_get_ih_wptr - get the IH ring buffer wptr
6963  *
6964  * @rdev: radeon_device pointer
6965  *
6966  * Get the IH ring buffer wptr from either the register
6967  * or the writeback memory buffer (CIK).  Also check for
6968  * ring buffer overflow and deal with it.
6969  * Used by cik_irq_process().
6970  * Returns the value of the wptr.
6971  */
6972 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6973 {
6974         u32 wptr, tmp;
6975
6976         if (rdev->wb.enabled)
6977                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6978         else
6979                 wptr = RREG32(IH_RB_WPTR);
6980
6981         if (wptr & RB_OVERFLOW) {
6982                 /* When a ring buffer overflow happen start parsing interrupt
6983                  * from the last not overwritten vector (wptr + 16). Hopefully
6984                  * this should allow us to catchup.
6985                  */
6986                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6987                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6988                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6989                 tmp = RREG32(IH_RB_CNTL);
6990                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6991                 WREG32(IH_RB_CNTL, tmp);
6992         }
6993         return (wptr & rdev->ih.ptr_mask);
6994 }
6995
6996 /*        CIK IV Ring
6997  * Each IV ring entry is 128 bits:
6998  * [7:0]    - interrupt source id
6999  * [31:8]   - reserved
7000  * [59:32]  - interrupt source data
7001  * [63:60]  - reserved
7002  * [71:64]  - RINGID
7003  *            CP:
7004  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7005  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7006  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7007  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7008  *            PIPE_ID - ME0 0=3D
7009  *                    - ME1&2 compute dispatcher (4 pipes each)
7010  *            SDMA:
7011  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7012  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7013  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7014  * [79:72]  - VMID
7015  * [95:80]  - PASID
7016  * [127:96] - reserved
7017  */
7018 /**
7019  * cik_irq_process - interrupt handler
7020  *
7021  * @rdev: radeon_device pointer
7022  *
7023  * Interrupt hander (CIK).  Walk the IH ring,
7024  * ack interrupts and schedule work to handle
7025  * interrupt events.
7026  * Returns irq process return code.
7027  */
7028 int cik_irq_process(struct radeon_device *rdev)
7029 {
7030         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7031         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7032         u32 wptr;
7033         u32 rptr;
7034         u32 src_id, src_data, ring_id;
7035         u8 me_id, pipe_id, queue_id;
7036         u32 ring_index;
7037         bool queue_hotplug = false;
7038         bool queue_reset = false;
7039         u32 addr, status, mc_client;
7040         bool queue_thermal = false;
7041
7042         if (!rdev->ih.enabled || rdev->shutdown)
7043                 return IRQ_NONE;
7044
7045         wptr = cik_get_ih_wptr(rdev);
7046
7047 restart_ih:
7048         /* is somebody else already processing irqs? */
7049         if (atomic_xchg(&rdev->ih.lock, 1))
7050                 return IRQ_NONE;
7051
7052         rptr = rdev->ih.rptr;
7053         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7054
7055         /* Order reading of wptr vs. reading of IH ring data */
7056         rmb();
7057
7058         /* display interrupts */
7059         cik_irq_ack(rdev);
7060
7061         while (rptr != wptr) {
7062                 /* wptr/rptr are in bytes! */
7063                 ring_index = rptr / 4;
7064                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7065                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7066                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7067
7068                 switch (src_id) {
7069                 case 1: /* D1 vblank/vline */
7070                         switch (src_data) {
7071                         case 0: /* D1 vblank */
7072                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7073                                         if (rdev->irq.crtc_vblank_int[0]) {
7074                                                 drm_handle_vblank(rdev->ddev, 0);
7075                                                 rdev->pm.vblank_sync = true;
7076                                                 wake_up(&rdev->irq.vblank_queue);
7077                                         }
7078                                         if (atomic_read(&rdev->irq.pflip[0]))
7079                                                 radeon_crtc_handle_flip(rdev, 0);
7080                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7081                                         DRM_DEBUG("IH: D1 vblank\n");
7082                                 }
7083                                 break;
7084                         case 1: /* D1 vline */
7085                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7086                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7087                                         DRM_DEBUG("IH: D1 vline\n");
7088                                 }
7089                                 break;
7090                         default:
7091                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7092                                 break;
7093                         }
7094                         break;
7095                 case 2: /* D2 vblank/vline */
7096                         switch (src_data) {
7097                         case 0: /* D2 vblank */
7098                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7099                                         if (rdev->irq.crtc_vblank_int[1]) {
7100                                                 drm_handle_vblank(rdev->ddev, 1);
7101                                                 rdev->pm.vblank_sync = true;
7102                                                 wake_up(&rdev->irq.vblank_queue);
7103                                         }
7104                                         if (atomic_read(&rdev->irq.pflip[1]))
7105                                                 radeon_crtc_handle_flip(rdev, 1);
7106                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7107                                         DRM_DEBUG("IH: D2 vblank\n");
7108                                 }
7109                                 break;
7110                         case 1: /* D2 vline */
7111                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7112                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7113                                         DRM_DEBUG("IH: D2 vline\n");
7114                                 }
7115                                 break;
7116                         default:
7117                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7118                                 break;
7119                         }
7120                         break;
7121                 case 3: /* D3 vblank/vline */
7122                         switch (src_data) {
7123                         case 0: /* D3 vblank */
7124                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7125                                         if (rdev->irq.crtc_vblank_int[2]) {
7126                                                 drm_handle_vblank(rdev->ddev, 2);
7127                                                 rdev->pm.vblank_sync = true;
7128                                                 wake_up(&rdev->irq.vblank_queue);
7129                                         }
7130                                         if (atomic_read(&rdev->irq.pflip[2]))
7131                                                 radeon_crtc_handle_flip(rdev, 2);
7132                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7133                                         DRM_DEBUG("IH: D3 vblank\n");
7134                                 }
7135                                 break;
7136                         case 1: /* D3 vline */
7137                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7138                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7139                                         DRM_DEBUG("IH: D3 vline\n");
7140                                 }
7141                                 break;
7142                         default:
7143                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7144                                 break;
7145                         }
7146                         break;
7147                 case 4: /* D4 vblank/vline */
7148                         switch (src_data) {
7149                         case 0: /* D4 vblank */
7150                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7151                                         if (rdev->irq.crtc_vblank_int[3]) {
7152                                                 drm_handle_vblank(rdev->ddev, 3);
7153                                                 rdev->pm.vblank_sync = true;
7154                                                 wake_up(&rdev->irq.vblank_queue);
7155                                         }
7156                                         if (atomic_read(&rdev->irq.pflip[3]))
7157                                                 radeon_crtc_handle_flip(rdev, 3);
7158                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7159                                         DRM_DEBUG("IH: D4 vblank\n");
7160                                 }
7161                                 break;
7162                         case 1: /* D4 vline */
7163                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7164                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7165                                         DRM_DEBUG("IH: D4 vline\n");
7166                                 }
7167                                 break;
7168                         default:
7169                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7170                                 break;
7171                         }
7172                         break;
7173                 case 5: /* D5 vblank/vline */
7174                         switch (src_data) {
7175                         case 0: /* D5 vblank */
7176                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7177                                         if (rdev->irq.crtc_vblank_int[4]) {
7178                                                 drm_handle_vblank(rdev->ddev, 4);
7179                                                 rdev->pm.vblank_sync = true;
7180                                                 wake_up(&rdev->irq.vblank_queue);
7181                                         }
7182                                         if (atomic_read(&rdev->irq.pflip[4]))
7183                                                 radeon_crtc_handle_flip(rdev, 4);
7184                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7185                                         DRM_DEBUG("IH: D5 vblank\n");
7186                                 }
7187                                 break;
7188                         case 1: /* D5 vline */
7189                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7190                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7191                                         DRM_DEBUG("IH: D5 vline\n");
7192                                 }
7193                                 break;
7194                         default:
7195                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7196                                 break;
7197                         }
7198                         break;
7199                 case 6: /* D6 vblank/vline */
7200                         switch (src_data) {
7201                         case 0: /* D6 vblank */
7202                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7203                                         if (rdev->irq.crtc_vblank_int[5]) {
7204                                                 drm_handle_vblank(rdev->ddev, 5);
7205                                                 rdev->pm.vblank_sync = true;
7206                                                 wake_up(&rdev->irq.vblank_queue);
7207                                         }
7208                                         if (atomic_read(&rdev->irq.pflip[5]))
7209                                                 radeon_crtc_handle_flip(rdev, 5);
7210                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7211                                         DRM_DEBUG("IH: D6 vblank\n");
7212                                 }
7213                                 break;
7214                         case 1: /* D6 vline */
7215                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7216                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7217                                         DRM_DEBUG("IH: D6 vline\n");
7218                                 }
7219                                 break;
7220                         default:
7221                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7222                                 break;
7223                         }
7224                         break;
7225                 case 42: /* HPD hotplug */
7226                         switch (src_data) {
7227                         case 0:
7228                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7229                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7230                                         queue_hotplug = true;
7231                                         DRM_DEBUG("IH: HPD1\n");
7232                                 }
7233                                 break;
7234                         case 1:
7235                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7236                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7237                                         queue_hotplug = true;
7238                                         DRM_DEBUG("IH: HPD2\n");
7239                                 }
7240                                 break;
7241                         case 2:
7242                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7243                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7244                                         queue_hotplug = true;
7245                                         DRM_DEBUG("IH: HPD3\n");
7246                                 }
7247                                 break;
7248                         case 3:
7249                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7250                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7251                                         queue_hotplug = true;
7252                                         DRM_DEBUG("IH: HPD4\n");
7253                                 }
7254                                 break;
7255                         case 4:
7256                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7257                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7258                                         queue_hotplug = true;
7259                                         DRM_DEBUG("IH: HPD5\n");
7260                                 }
7261                                 break;
7262                         case 5:
7263                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7264                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7265                                         queue_hotplug = true;
7266                                         DRM_DEBUG("IH: HPD6\n");
7267                                 }
7268                                 break;
7269                         default:
7270                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7271                                 break;
7272                         }
7273                         break;
7274                 case 124: /* UVD */
7275                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7276                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7277                         break;
7278                 case 146:
7279                 case 147:
7280                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7281                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7282                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7283                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7284                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7285                                 addr);
7286                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7287                                 status);
7288                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7289                         /* reset addr and status */
7290                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7291                         break;
7292                 case 176: /* GFX RB CP_INT */
7293                 case 177: /* GFX IB CP_INT */
7294                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7295                         break;
7296                 case 181: /* CP EOP event */
7297                         DRM_DEBUG("IH: CP EOP\n");
7298                         /* XXX check the bitfield order! */
7299                         me_id = (ring_id & 0x60) >> 5;
7300                         pipe_id = (ring_id & 0x18) >> 3;
7301                         queue_id = (ring_id & 0x7) >> 0;
7302                         switch (me_id) {
7303                         case 0:
7304                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7305                                 break;
7306                         case 1:
7307                         case 2:
7308                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7309                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7310                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7311                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7312                                 break;
7313                         }
7314                         break;
7315                 case 184: /* CP Privileged reg access */
7316                         DRM_ERROR("Illegal register access in command stream\n");
7317                         /* XXX check the bitfield order! */
7318                         me_id = (ring_id & 0x60) >> 5;
7319                         pipe_id = (ring_id & 0x18) >> 3;
7320                         queue_id = (ring_id & 0x7) >> 0;
7321                         switch (me_id) {
7322                         case 0:
7323                                 /* This results in a full GPU reset, but all we need to do is soft
7324                                  * reset the CP for gfx
7325                                  */
7326                                 queue_reset = true;
7327                                 break;
7328                         case 1:
7329                                 /* XXX compute */
7330                                 queue_reset = true;
7331                                 break;
7332                         case 2:
7333                                 /* XXX compute */
7334                                 queue_reset = true;
7335                                 break;
7336                         }
7337                         break;
7338                 case 185: /* CP Privileged inst */
7339                         DRM_ERROR("Illegal instruction in command stream\n");
7340                         /* XXX check the bitfield order! */
7341                         me_id = (ring_id & 0x60) >> 5;
7342                         pipe_id = (ring_id & 0x18) >> 3;
7343                         queue_id = (ring_id & 0x7) >> 0;
7344                         switch (me_id) {
7345                         case 0:
7346                                 /* This results in a full GPU reset, but all we need to do is soft
7347                                  * reset the CP for gfx
7348                                  */
7349                                 queue_reset = true;
7350                                 break;
7351                         case 1:
7352                                 /* XXX compute */
7353                                 queue_reset = true;
7354                                 break;
7355                         case 2:
7356                                 /* XXX compute */
7357                                 queue_reset = true;
7358                                 break;
7359                         }
7360                         break;
7361                 case 224: /* SDMA trap event */
7362                         /* XXX check the bitfield order! */
7363                         me_id = (ring_id & 0x3) >> 0;
7364                         queue_id = (ring_id & 0xc) >> 2;
7365                         DRM_DEBUG("IH: SDMA trap\n");
7366                         switch (me_id) {
7367                         case 0:
7368                                 switch (queue_id) {
7369                                 case 0:
7370                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7371                                         break;
7372                                 case 1:
7373                                         /* XXX compute */
7374                                         break;
7375                                 case 2:
7376                                         /* XXX compute */
7377                                         break;
7378                                 }
7379                                 break;
7380                         case 1:
7381                                 switch (queue_id) {
7382                                 case 0:
7383                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7384                                         break;
7385                                 case 1:
7386                                         /* XXX compute */
7387                                         break;
7388                                 case 2:
7389                                         /* XXX compute */
7390                                         break;
7391                                 }
7392                                 break;
7393                         }
7394                         break;
7395                 case 230: /* thermal low to high */
7396                         DRM_DEBUG("IH: thermal low to high\n");
7397                         rdev->pm.dpm.thermal.high_to_low = false;
7398                         queue_thermal = true;
7399                         break;
7400                 case 231: /* thermal high to low */
7401                         DRM_DEBUG("IH: thermal high to low\n");
7402                         rdev->pm.dpm.thermal.high_to_low = true;
7403                         queue_thermal = true;
7404                         break;
7405                 case 233: /* GUI IDLE */
7406                         DRM_DEBUG("IH: GUI idle\n");
7407                         break;
7408                 case 241: /* SDMA Privileged inst */
7409                 case 247: /* SDMA Privileged inst */
7410                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
7411                         /* XXX check the bitfield order! */
7412                         me_id = (ring_id & 0x3) >> 0;
7413                         queue_id = (ring_id & 0xc) >> 2;
7414                         switch (me_id) {
7415                         case 0:
7416                                 switch (queue_id) {
7417                                 case 0:
7418                                         queue_reset = true;
7419                                         break;
7420                                 case 1:
7421                                         /* XXX compute */
7422                                         queue_reset = true;
7423                                         break;
7424                                 case 2:
7425                                         /* XXX compute */
7426                                         queue_reset = true;
7427                                         break;
7428                                 }
7429                                 break;
7430                         case 1:
7431                                 switch (queue_id) {
7432                                 case 0:
7433                                         queue_reset = true;
7434                                         break;
7435                                 case 1:
7436                                         /* XXX compute */
7437                                         queue_reset = true;
7438                                         break;
7439                                 case 2:
7440                                         /* XXX compute */
7441                                         queue_reset = true;
7442                                         break;
7443                                 }
7444                                 break;
7445                         }
7446                         break;
7447                 default:
7448                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7449                         break;
7450                 }
7451
7452                 /* wptr/rptr are in bytes! */
7453                 rptr += 16;
7454                 rptr &= rdev->ih.ptr_mask;
7455         }
7456         if (queue_hotplug)
7457                 schedule_work(&rdev->hotplug_work);
7458         if (queue_reset)
7459                 schedule_work(&rdev->reset_work);
7460         if (queue_thermal)
7461                 schedule_work(&rdev->pm.dpm.thermal.work);
7462         rdev->ih.rptr = rptr;
7463         WREG32(IH_RB_RPTR, rdev->ih.rptr);
7464         atomic_set(&rdev->ih.lock, 0);
7465
7466         /* make sure wptr hasn't changed while processing */
7467         wptr = cik_get_ih_wptr(rdev);
7468         if (wptr != rptr)
7469                 goto restart_ih;
7470
7471         return IRQ_HANDLED;
7472 }
7473
7474 /*
7475  * startup/shutdown callbacks
7476  */
7477 /**
7478  * cik_startup - program the asic to a functional state
7479  *
7480  * @rdev: radeon_device pointer
7481  *
7482  * Programs the asic to a functional state (CIK).
7483  * Called by cik_init() and cik_resume().
7484  * Returns 0 for success, error for failure.
7485  */
7486 static int cik_startup(struct radeon_device *rdev)
7487 {
7488         struct radeon_ring *ring;
7489         int r;
7490
7491         /* enable pcie gen2/3 link */
7492         cik_pcie_gen3_enable(rdev);
7493         /* enable aspm */
7494         cik_program_aspm(rdev);
7495
7496         /* scratch needs to be initialized before MC */
7497         r = r600_vram_scratch_init(rdev);
7498         if (r)
7499                 return r;
7500
7501         cik_mc_program(rdev);
7502
7503         if (rdev->flags & RADEON_IS_IGP) {
7504                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7505                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
7506                         r = cik_init_microcode(rdev);
7507                         if (r) {
7508                                 DRM_ERROR("Failed to load firmware!\n");
7509                                 return r;
7510                         }
7511                 }
7512         } else {
7513                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7514                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7515                     !rdev->mc_fw) {
7516                         r = cik_init_microcode(rdev);
7517                         if (r) {
7518                                 DRM_ERROR("Failed to load firmware!\n");
7519                                 return r;
7520                         }
7521                 }
7522
7523                 r = ci_mc_load_microcode(rdev);
7524                 if (r) {
7525                         DRM_ERROR("Failed to load MC firmware!\n");
7526                         return r;
7527                 }
7528         }
7529
7530         r = cik_pcie_gart_enable(rdev);
7531         if (r)
7532                 return r;
7533         cik_gpu_init(rdev);
7534
7535         /* allocate rlc buffers */
7536         if (rdev->flags & RADEON_IS_IGP) {
7537                 if (rdev->family == CHIP_KAVERI) {
7538                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7539                         rdev->rlc.reg_list_size =
7540                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7541                 } else {
7542                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7543                         rdev->rlc.reg_list_size =
7544                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7545                 }
7546         }
7547         rdev->rlc.cs_data = ci_cs_data;
7548         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7549         r = sumo_rlc_init(rdev);
7550         if (r) {
7551                 DRM_ERROR("Failed to init rlc BOs!\n");
7552                 return r;
7553         }
7554
7555         /* allocate wb buffer */
7556         r = radeon_wb_init(rdev);
7557         if (r)
7558                 return r;
7559
7560         /* allocate mec buffers */
7561         r = cik_mec_init(rdev);
7562         if (r) {
7563                 DRM_ERROR("Failed to init MEC BOs!\n");
7564                 return r;
7565         }
7566
7567         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7568         if (r) {
7569                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7570                 return r;
7571         }
7572
7573         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7574         if (r) {
7575                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7576                 return r;
7577         }
7578
7579         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7580         if (r) {
7581                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7582                 return r;
7583         }
7584
7585         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7586         if (r) {
7587                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7588                 return r;
7589         }
7590
7591         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7592         if (r) {
7593                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7594                 return r;
7595         }
7596
7597         r = radeon_uvd_resume(rdev);
7598         if (!r) {
7599                 r = uvd_v4_2_resume(rdev);
7600                 if (!r) {
7601                         r = radeon_fence_driver_start_ring(rdev,
7602                                                            R600_RING_TYPE_UVD_INDEX);
7603                         if (r)
7604                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7605                 }
7606         }
7607         if (r)
7608                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7609
7610         /* Enable IRQ */
7611         if (!rdev->irq.installed) {
7612                 r = radeon_irq_kms_init(rdev);
7613                 if (r)
7614                         return r;
7615         }
7616
7617         r = cik_irq_init(rdev);
7618         if (r) {
7619                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7620                 radeon_irq_kms_fini(rdev);
7621                 return r;
7622         }
7623         cik_irq_set(rdev);
7624
7625         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7626         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7627                              CP_RB0_RPTR, CP_RB0_WPTR,
7628                              PACKET3(PACKET3_NOP, 0x3FFF));
7629         if (r)
7630                 return r;
7631
7632         /* set up the compute queues */
7633         /* type-2 packets are deprecated on MEC, use type-3 instead */
7634         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7635         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7636                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7637                              PACKET3(PACKET3_NOP, 0x3FFF));
7638         if (r)
7639                 return r;
7640         ring->me = 1; /* first MEC */
7641         ring->pipe = 0; /* first pipe */
7642         ring->queue = 0; /* first queue */
7643         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7644
7645         /* type-2 packets are deprecated on MEC, use type-3 instead */
7646         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7647         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7648                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7649                              PACKET3(PACKET3_NOP, 0x3FFF));
7650         if (r)
7651                 return r;
7652         /* dGPU only have 1 MEC */
7653         ring->me = 1; /* first MEC */
7654         ring->pipe = 0; /* first pipe */
7655         ring->queue = 1; /* second queue */
7656         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7657
7658         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7659         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7660                              SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7661                              SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7662                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7663         if (r)
7664                 return r;
7665
7666         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7667         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7668                              SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7669                              SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7670                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7671         if (r)
7672                 return r;
7673
7674         r = cik_cp_resume(rdev);
7675         if (r)
7676                 return r;
7677
7678         r = cik_sdma_resume(rdev);
7679         if (r)
7680                 return r;
7681
7682         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7683         if (ring->ring_size) {
7684                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7685                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7686                                      RADEON_CP_PACKET2);
7687                 if (!r)
7688                         r = uvd_v1_0_init(rdev);
7689                 if (r)
7690                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7691         }
7692
7693         r = radeon_ib_pool_init(rdev);
7694         if (r) {
7695                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7696                 return r;
7697         }
7698
7699         r = radeon_vm_manager_init(rdev);
7700         if (r) {
7701                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7702                 return r;
7703         }
7704
7705         r = dce6_audio_init(rdev);
7706         if (r)
7707                 return r;
7708
7709         return 0;
7710 }
7711
7712 /**
7713  * cik_resume - resume the asic to a functional state
7714  *
7715  * @rdev: radeon_device pointer
7716  *
7717  * Programs the asic to a functional state (CIK).
7718  * Called at resume.
7719  * Returns 0 for success, error for failure.
7720  */
7721 int cik_resume(struct radeon_device *rdev)
7722 {
7723         int r;
7724
7725         /* post card */
7726         atom_asic_init(rdev->mode_info.atom_context);
7727
7728         /* init golden registers */
7729         cik_init_golden_registers(rdev);
7730
7731         rdev->accel_working = true;
7732         r = cik_startup(rdev);
7733         if (r) {
7734                 DRM_ERROR("cik startup failed on resume\n");
7735                 rdev->accel_working = false;
7736                 return r;
7737         }
7738
7739         return r;
7740
7741 }
7742
7743 /**
7744  * cik_suspend - suspend the asic
7745  *
7746  * @rdev: radeon_device pointer
7747  *
7748  * Bring the chip into a state suitable for suspend (CIK).
7749  * Called at suspend.
7750  * Returns 0 for success.
7751  */
7752 int cik_suspend(struct radeon_device *rdev)
7753 {
7754         dce6_audio_fini(rdev);
7755         radeon_vm_manager_fini(rdev);
7756         cik_cp_enable(rdev, false);
7757         cik_sdma_enable(rdev, false);
7758         uvd_v1_0_fini(rdev);
7759         radeon_uvd_suspend(rdev);
7760         cik_fini_pg(rdev);
7761         cik_fini_cg(rdev);
7762         cik_irq_suspend(rdev);
7763         radeon_wb_disable(rdev);
7764         cik_pcie_gart_disable(rdev);
7765         return 0;
7766 }
7767
7768 /* Plan is to move initialization in that function and use
7769  * helper function so that radeon_device_init pretty much
7770  * do nothing more than calling asic specific function. This
7771  * should also allow to remove a bunch of callback function
7772  * like vram_info.
7773  */
7774 /**
7775  * cik_init - asic specific driver and hw init
7776  *
7777  * @rdev: radeon_device pointer
7778  *
7779  * Setup asic specific driver variables and program the hw
7780  * to a functional state (CIK).
7781  * Called at driver startup.
7782  * Returns 0 for success, errors for failure.
7783  */
7784 int cik_init(struct radeon_device *rdev)
7785 {
7786         struct radeon_ring *ring;
7787         int r;
7788
7789         /* Read BIOS */
7790         if (!radeon_get_bios(rdev)) {
7791                 if (ASIC_IS_AVIVO(rdev))
7792                         return -EINVAL;
7793         }
7794         /* Must be an ATOMBIOS */
7795         if (!rdev->is_atom_bios) {
7796                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7797                 return -EINVAL;
7798         }
7799         r = radeon_atombios_init(rdev);
7800         if (r)
7801                 return r;
7802
7803         /* Post card if necessary */
7804         if (!radeon_card_posted(rdev)) {
7805                 if (!rdev->bios) {
7806                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7807                         return -EINVAL;
7808                 }
7809                 DRM_INFO("GPU not posted. posting now...\n");
7810                 atom_asic_init(rdev->mode_info.atom_context);
7811         }
7812         /* init golden registers */
7813         cik_init_golden_registers(rdev);
7814         /* Initialize scratch registers */
7815         cik_scratch_init(rdev);
7816         /* Initialize surface registers */
7817         radeon_surface_init(rdev);
7818         /* Initialize clocks */
7819         radeon_get_clock_info(rdev->ddev);
7820
7821         /* Fence driver */
7822         r = radeon_fence_driver_init(rdev);
7823         if (r)
7824                 return r;
7825
7826         /* initialize memory controller */
7827         r = cik_mc_init(rdev);
7828         if (r)
7829                 return r;
7830         /* Memory manager */
7831         r = radeon_bo_init(rdev);
7832         if (r)
7833                 return r;
7834
7835         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7836         ring->ring_obj = NULL;
7837         r600_ring_init(rdev, ring, 1024 * 1024);
7838
7839         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7840         ring->ring_obj = NULL;
7841         r600_ring_init(rdev, ring, 1024 * 1024);
7842         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7843         if (r)
7844                 return r;
7845
7846         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7847         ring->ring_obj = NULL;
7848         r600_ring_init(rdev, ring, 1024 * 1024);
7849         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7850         if (r)
7851                 return r;
7852
7853         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7854         ring->ring_obj = NULL;
7855         r600_ring_init(rdev, ring, 256 * 1024);
7856
7857         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7858         ring->ring_obj = NULL;
7859         r600_ring_init(rdev, ring, 256 * 1024);
7860
7861         r = radeon_uvd_init(rdev);
7862         if (!r) {
7863                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7864                 ring->ring_obj = NULL;
7865                 r600_ring_init(rdev, ring, 4096);
7866         }
7867
7868         rdev->ih.ring_obj = NULL;
7869         r600_ih_ring_init(rdev, 64 * 1024);
7870
7871         r = r600_pcie_gart_init(rdev);
7872         if (r)
7873                 return r;
7874
7875         rdev->accel_working = true;
7876         r = cik_startup(rdev);
7877         if (r) {
7878                 dev_err(rdev->dev, "disabling GPU acceleration\n");
7879                 cik_cp_fini(rdev);
7880                 cik_sdma_fini(rdev);
7881                 cik_irq_fini(rdev);
7882                 sumo_rlc_fini(rdev);
7883                 cik_mec_fini(rdev);
7884                 radeon_wb_fini(rdev);
7885                 radeon_ib_pool_fini(rdev);
7886                 radeon_vm_manager_fini(rdev);
7887                 radeon_irq_kms_fini(rdev);
7888                 cik_pcie_gart_fini(rdev);
7889                 rdev->accel_working = false;
7890         }
7891
7892         /* Don't start up if the MC ucode is missing.
7893          * The default clocks and voltages before the MC ucode
7894          * is loaded are not suffient for advanced operations.
7895          */
7896         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7897                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7898                 return -EINVAL;
7899         }
7900
7901         return 0;
7902 }
7903
7904 /**
7905  * cik_fini - asic specific driver and hw fini
7906  *
7907  * @rdev: radeon_device pointer
7908  *
7909  * Tear down the asic specific driver variables and program the hw
7910  * to an idle state (CIK).
7911  * Called at driver unload.
7912  */
7913 void cik_fini(struct radeon_device *rdev)
7914 {
7915         cik_cp_fini(rdev);
7916         cik_sdma_fini(rdev);
7917         cik_fini_pg(rdev);
7918         cik_fini_cg(rdev);
7919         cik_irq_fini(rdev);
7920         sumo_rlc_fini(rdev);
7921         cik_mec_fini(rdev);
7922         radeon_wb_fini(rdev);
7923         radeon_vm_manager_fini(rdev);
7924         radeon_ib_pool_fini(rdev);
7925         radeon_irq_kms_fini(rdev);
7926         uvd_v1_0_fini(rdev);
7927         radeon_uvd_fini(rdev);
7928         cik_pcie_gart_fini(rdev);
7929         r600_vram_scratch_fini(rdev);
7930         radeon_gem_fini(rdev);
7931         radeon_fence_driver_fini(rdev);
7932         radeon_bo_fini(rdev);
7933         radeon_atombios_fini(rdev);
7934         kfree(rdev->bios);
7935         rdev->bios = NULL;
7936 }
7937
7938 void dce8_program_fmt(struct drm_encoder *encoder)
7939 {
7940         struct drm_device *dev = encoder->dev;
7941         struct radeon_device *rdev = dev->dev_private;
7942         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
7943         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
7944         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
7945         int bpc = 0;
7946         u32 tmp = 0;
7947         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
7948
7949         if (connector) {
7950                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
7951                 bpc = radeon_get_monitor_bpc(connector);
7952                 dither = radeon_connector->dither;
7953         }
7954
7955         /* LVDS/eDP FMT is set up by atom */
7956         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
7957                 return;
7958
7959         /* not needed for analog */
7960         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
7961             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
7962                 return;
7963
7964         if (bpc == 0)
7965                 return;
7966
7967         switch (bpc) {
7968         case 6:
7969                 if (dither == RADEON_FMT_DITHER_ENABLE)
7970                         /* XXX sort out optimal dither settings */
7971                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7972                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
7973                 else
7974                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
7975                 break;
7976         case 8:
7977                 if (dither == RADEON_FMT_DITHER_ENABLE)
7978                         /* XXX sort out optimal dither settings */
7979                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7980                                 FMT_RGB_RANDOM_ENABLE |
7981                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
7982                 else
7983                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
7984                 break;
7985         case 10:
7986                 if (dither == RADEON_FMT_DITHER_ENABLE)
7987                         /* XXX sort out optimal dither settings */
7988                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7989                                 FMT_RGB_RANDOM_ENABLE |
7990                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
7991                 else
7992                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
7993                 break;
7994         default:
7995                 /* not needed */
7996                 break;
7997         }
7998
7999         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8000 }
8001
8002 /* display watermark setup */
8003 /**
8004  * dce8_line_buffer_adjust - Set up the line buffer
8005  *
8006  * @rdev: radeon_device pointer
8007  * @radeon_crtc: the selected display controller
8008  * @mode: the current display mode on the selected display
8009  * controller
8010  *
8011  * Setup up the line buffer allocation for
8012  * the selected display controller (CIK).
8013  * Returns the line buffer size in pixels.
8014  */
8015 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8016                                    struct radeon_crtc *radeon_crtc,
8017                                    struct drm_display_mode *mode)
8018 {
8019         u32 tmp, buffer_alloc, i;
8020         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8021         /*
8022          * Line Buffer Setup
8023          * There are 6 line buffers, one for each display controllers.
8024          * There are 3 partitions per LB. Select the number of partitions
8025          * to enable based on the display width.  For display widths larger
8026          * than 4096, you need use to use 2 display controllers and combine
8027          * them using the stereo blender.
8028          */
8029         if (radeon_crtc->base.enabled && mode) {
8030                 if (mode->crtc_hdisplay < 1920) {
8031                         tmp = 1;
8032                         buffer_alloc = 2;
8033                 } else if (mode->crtc_hdisplay < 2560) {
8034                         tmp = 2;
8035                         buffer_alloc = 2;
8036                 } else if (mode->crtc_hdisplay < 4096) {
8037                         tmp = 0;
8038                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8039                 } else {
8040                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8041                         tmp = 0;
8042                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8043                 }
8044         } else {
8045                 tmp = 1;
8046                 buffer_alloc = 0;
8047         }
8048
8049         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8050                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8051
8052         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8053                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8054         for (i = 0; i < rdev->usec_timeout; i++) {
8055                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8056                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8057                         break;
8058                 udelay(1);
8059         }
8060
8061         if (radeon_crtc->base.enabled && mode) {
8062                 switch (tmp) {
8063                 case 0:
8064                 default:
8065                         return 4096 * 2;
8066                 case 1:
8067                         return 1920 * 2;
8068                 case 2:
8069                         return 2560 * 2;
8070                 }
8071         }
8072
8073         /* controller not enabled, so no lb used */
8074         return 0;
8075 }
8076
8077 /**
8078  * cik_get_number_of_dram_channels - get the number of dram channels
8079  *
8080  * @rdev: radeon_device pointer
8081  *
8082  * Look up the number of video ram channels (CIK).
8083  * Used for display watermark bandwidth calculations
8084  * Returns the number of dram channels
8085  */
8086 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8087 {
8088         u32 tmp = RREG32(MC_SHARED_CHMAP);
8089
8090         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8091         case 0:
8092         default:
8093                 return 1;
8094         case 1:
8095                 return 2;
8096         case 2:
8097                 return 4;
8098         case 3:
8099                 return 8;
8100         case 4:
8101                 return 3;
8102         case 5:
8103                 return 6;
8104         case 6:
8105                 return 10;
8106         case 7:
8107                 return 12;
8108         case 8:
8109                 return 16;
8110         }
8111 }
8112
8113 struct dce8_wm_params {
8114         u32 dram_channels; /* number of dram channels */
8115         u32 yclk;          /* bandwidth per dram data pin in kHz */
8116         u32 sclk;          /* engine clock in kHz */
8117         u32 disp_clk;      /* display clock in kHz */
8118         u32 src_width;     /* viewport width */
8119         u32 active_time;   /* active display time in ns */
8120         u32 blank_time;    /* blank time in ns */
8121         bool interlaced;    /* mode is interlaced */
8122         fixed20_12 vsc;    /* vertical scale ratio */
8123         u32 num_heads;     /* number of active crtcs */
8124         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8125         u32 lb_size;       /* line buffer allocated to pipe */
8126         u32 vtaps;         /* vertical scaler taps */
8127 };
8128
8129 /**
8130  * dce8_dram_bandwidth - get the dram bandwidth
8131  *
8132  * @wm: watermark calculation data
8133  *
8134  * Calculate the raw dram bandwidth (CIK).
8135  * Used for display watermark bandwidth calculations
8136  * Returns the dram bandwidth in MBytes/s
8137  */
8138 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8139 {
8140         /* Calculate raw DRAM Bandwidth */
8141         fixed20_12 dram_efficiency; /* 0.7 */
8142         fixed20_12 yclk, dram_channels, bandwidth;
8143         fixed20_12 a;
8144
8145         a.full = dfixed_const(1000);
8146         yclk.full = dfixed_const(wm->yclk);
8147         yclk.full = dfixed_div(yclk, a);
8148         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8149         a.full = dfixed_const(10);
8150         dram_efficiency.full = dfixed_const(7);
8151         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8152         bandwidth.full = dfixed_mul(dram_channels, yclk);
8153         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8154
8155         return dfixed_trunc(bandwidth);
8156 }
8157
8158 /**
8159  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8160  *
8161  * @wm: watermark calculation data
8162  *
8163  * Calculate the dram bandwidth used for display (CIK).
8164  * Used for display watermark bandwidth calculations
8165  * Returns the dram bandwidth for display in MBytes/s
8166  */
8167 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8168 {
8169         /* Calculate DRAM Bandwidth and the part allocated to display. */
8170         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8171         fixed20_12 yclk, dram_channels, bandwidth;
8172         fixed20_12 a;
8173
8174         a.full = dfixed_const(1000);
8175         yclk.full = dfixed_const(wm->yclk);
8176         yclk.full = dfixed_div(yclk, a);
8177         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8178         a.full = dfixed_const(10);
8179         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8180         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8181         bandwidth.full = dfixed_mul(dram_channels, yclk);
8182         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8183
8184         return dfixed_trunc(bandwidth);
8185 }
8186
8187 /**
8188  * dce8_data_return_bandwidth - get the data return bandwidth
8189  *
8190  * @wm: watermark calculation data
8191  *
8192  * Calculate the data return bandwidth used for display (CIK).
8193  * Used for display watermark bandwidth calculations
8194  * Returns the data return bandwidth in MBytes/s
8195  */
8196 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8197 {
8198         /* Calculate the display Data return Bandwidth */
8199         fixed20_12 return_efficiency; /* 0.8 */
8200         fixed20_12 sclk, bandwidth;
8201         fixed20_12 a;
8202
8203         a.full = dfixed_const(1000);
8204         sclk.full = dfixed_const(wm->sclk);
8205         sclk.full = dfixed_div(sclk, a);
8206         a.full = dfixed_const(10);
8207         return_efficiency.full = dfixed_const(8);
8208         return_efficiency.full = dfixed_div(return_efficiency, a);
8209         a.full = dfixed_const(32);
8210         bandwidth.full = dfixed_mul(a, sclk);
8211         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8212
8213         return dfixed_trunc(bandwidth);
8214 }
8215
8216 /**
8217  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8218  *
8219  * @wm: watermark calculation data
8220  *
8221  * Calculate the dmif bandwidth used for display (CIK).
8222  * Used for display watermark bandwidth calculations
8223  * Returns the dmif bandwidth in MBytes/s
8224  */
8225 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8226 {
8227         /* Calculate the DMIF Request Bandwidth */
8228         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8229         fixed20_12 disp_clk, bandwidth;
8230         fixed20_12 a, b;
8231
8232         a.full = dfixed_const(1000);
8233         disp_clk.full = dfixed_const(wm->disp_clk);
8234         disp_clk.full = dfixed_div(disp_clk, a);
8235         a.full = dfixed_const(32);
8236         b.full = dfixed_mul(a, disp_clk);
8237
8238         a.full = dfixed_const(10);
8239         disp_clk_request_efficiency.full = dfixed_const(8);
8240         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8241
8242         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8243
8244         return dfixed_trunc(bandwidth);
8245 }
8246
8247 /**
8248  * dce8_available_bandwidth - get the min available bandwidth
8249  *
8250  * @wm: watermark calculation data
8251  *
8252  * Calculate the min available bandwidth used for display (CIK).
8253  * Used for display watermark bandwidth calculations
8254  * Returns the min available bandwidth in MBytes/s
8255  */
8256 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8257 {
8258         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8259         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8260         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8261         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8262
8263         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8264 }
8265
8266 /**
8267  * dce8_average_bandwidth - get the average available bandwidth
8268  *
8269  * @wm: watermark calculation data
8270  *
8271  * Calculate the average available bandwidth used for display (CIK).
8272  * Used for display watermark bandwidth calculations
8273  * Returns the average available bandwidth in MBytes/s
8274  */
8275 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8276 {
8277         /* Calculate the display mode Average Bandwidth
8278          * DisplayMode should contain the source and destination dimensions,
8279          * timing, etc.
8280          */
8281         fixed20_12 bpp;
8282         fixed20_12 line_time;
8283         fixed20_12 src_width;
8284         fixed20_12 bandwidth;
8285         fixed20_12 a;
8286
8287         a.full = dfixed_const(1000);
8288         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8289         line_time.full = dfixed_div(line_time, a);
8290         bpp.full = dfixed_const(wm->bytes_per_pixel);
8291         src_width.full = dfixed_const(wm->src_width);
8292         bandwidth.full = dfixed_mul(src_width, bpp);
8293         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8294         bandwidth.full = dfixed_div(bandwidth, line_time);
8295
8296         return dfixed_trunc(bandwidth);
8297 }
8298
8299 /**
8300  * dce8_latency_watermark - get the latency watermark
8301  *
8302  * @wm: watermark calculation data
8303  *
8304  * Calculate the latency watermark (CIK).
8305  * Used for display watermark bandwidth calculations
8306  * Returns the latency watermark in ns
8307  */
8308 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8309 {
8310         /* First calculate the latency in ns */
8311         u32 mc_latency = 2000; /* 2000 ns. */
8312         u32 available_bandwidth = dce8_available_bandwidth(wm);
8313         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8314         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8315         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8316         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8317                 (wm->num_heads * cursor_line_pair_return_time);
8318         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8319         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8320         u32 tmp, dmif_size = 12288;
8321         fixed20_12 a, b, c;
8322
8323         if (wm->num_heads == 0)
8324                 return 0;
8325
8326         a.full = dfixed_const(2);
8327         b.full = dfixed_const(1);
8328         if ((wm->vsc.full > a.full) ||
8329             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8330             (wm->vtaps >= 5) ||
8331             ((wm->vsc.full >= a.full) && wm->interlaced))
8332                 max_src_lines_per_dst_line = 4;
8333         else
8334                 max_src_lines_per_dst_line = 2;
8335
8336         a.full = dfixed_const(available_bandwidth);
8337         b.full = dfixed_const(wm->num_heads);
8338         a.full = dfixed_div(a, b);
8339
8340         b.full = dfixed_const(mc_latency + 512);
8341         c.full = dfixed_const(wm->disp_clk);
8342         b.full = dfixed_div(b, c);
8343
8344         c.full = dfixed_const(dmif_size);
8345         b.full = dfixed_div(c, b);
8346
8347         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8348
8349         b.full = dfixed_const(1000);
8350         c.full = dfixed_const(wm->disp_clk);
8351         b.full = dfixed_div(c, b);
8352         c.full = dfixed_const(wm->bytes_per_pixel);
8353         b.full = dfixed_mul(b, c);
8354
8355         lb_fill_bw = min(tmp, dfixed_trunc(b));
8356
8357         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8358         b.full = dfixed_const(1000);
8359         c.full = dfixed_const(lb_fill_bw);
8360         b.full = dfixed_div(c, b);
8361         a.full = dfixed_div(a, b);
8362         line_fill_time = dfixed_trunc(a);
8363
8364         if (line_fill_time < wm->active_time)
8365                 return latency;
8366         else
8367                 return latency + (line_fill_time - wm->active_time);
8368
8369 }
8370
8371 /**
8372  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8373  * average and available dram bandwidth
8374  *
8375  * @wm: watermark calculation data
8376  *
8377  * Check if the display average bandwidth fits in the display
8378  * dram bandwidth (CIK).
8379  * Used for display watermark bandwidth calculations
8380  * Returns true if the display fits, false if not.
8381  */
8382 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8383 {
8384         if (dce8_average_bandwidth(wm) <=
8385             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8386                 return true;
8387         else
8388                 return false;
8389 }
8390
8391 /**
8392  * dce8_average_bandwidth_vs_available_bandwidth - check
8393  * average and available bandwidth
8394  *
8395  * @wm: watermark calculation data
8396  *
8397  * Check if the display average bandwidth fits in the display
8398  * available bandwidth (CIK).
8399  * Used for display watermark bandwidth calculations
8400  * Returns true if the display fits, false if not.
8401  */
8402 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8403 {
8404         if (dce8_average_bandwidth(wm) <=
8405             (dce8_available_bandwidth(wm) / wm->num_heads))
8406                 return true;
8407         else
8408                 return false;
8409 }
8410
8411 /**
8412  * dce8_check_latency_hiding - check latency hiding
8413  *
8414  * @wm: watermark calculation data
8415  *
8416  * Check latency hiding (CIK).
8417  * Used for display watermark bandwidth calculations
8418  * Returns true if the display fits, false if not.
8419  */
8420 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8421 {
8422         u32 lb_partitions = wm->lb_size / wm->src_width;
8423         u32 line_time = wm->active_time + wm->blank_time;
8424         u32 latency_tolerant_lines;
8425         u32 latency_hiding;
8426         fixed20_12 a;
8427
8428         a.full = dfixed_const(1);
8429         if (wm->vsc.full > a.full)
8430                 latency_tolerant_lines = 1;
8431         else {
8432                 if (lb_partitions <= (wm->vtaps + 1))
8433                         latency_tolerant_lines = 1;
8434                 else
8435                         latency_tolerant_lines = 2;
8436         }
8437
8438         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8439
8440         if (dce8_latency_watermark(wm) <= latency_hiding)
8441                 return true;
8442         else
8443                 return false;
8444 }
8445
8446 /**
8447  * dce8_program_watermarks - program display watermarks
8448  *
8449  * @rdev: radeon_device pointer
8450  * @radeon_crtc: the selected display controller
8451  * @lb_size: line buffer size
8452  * @num_heads: number of display controllers in use
8453  *
8454  * Calculate and program the display watermarks for the
8455  * selected display controller (CIK).
8456  */
8457 static void dce8_program_watermarks(struct radeon_device *rdev,
8458                                     struct radeon_crtc *radeon_crtc,
8459                                     u32 lb_size, u32 num_heads)
8460 {
8461         struct drm_display_mode *mode = &radeon_crtc->base.mode;
8462         struct dce8_wm_params wm_low, wm_high;
8463         u32 pixel_period;
8464         u32 line_time = 0;
8465         u32 latency_watermark_a = 0, latency_watermark_b = 0;
8466         u32 tmp, wm_mask;
8467
8468         if (radeon_crtc->base.enabled && num_heads && mode) {
8469                 pixel_period = 1000000 / (u32)mode->clock;
8470                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8471
8472                 /* watermark for high clocks */
8473                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8474                     rdev->pm.dpm_enabled) {
8475                         wm_high.yclk =
8476                                 radeon_dpm_get_mclk(rdev, false) * 10;
8477                         wm_high.sclk =
8478                                 radeon_dpm_get_sclk(rdev, false) * 10;
8479                 } else {
8480                         wm_high.yclk = rdev->pm.current_mclk * 10;
8481                         wm_high.sclk = rdev->pm.current_sclk * 10;
8482                 }
8483
8484                 wm_high.disp_clk = mode->clock;
8485                 wm_high.src_width = mode->crtc_hdisplay;
8486                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8487                 wm_high.blank_time = line_time - wm_high.active_time;
8488                 wm_high.interlaced = false;
8489                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8490                         wm_high.interlaced = true;
8491                 wm_high.vsc = radeon_crtc->vsc;
8492                 wm_high.vtaps = 1;
8493                 if (radeon_crtc->rmx_type != RMX_OFF)
8494                         wm_high.vtaps = 2;
8495                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8496                 wm_high.lb_size = lb_size;
8497                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8498                 wm_high.num_heads = num_heads;
8499
8500                 /* set for high clocks */
8501                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8502
8503                 /* possibly force display priority to high */
8504                 /* should really do this at mode validation time... */
8505                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8506                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8507                     !dce8_check_latency_hiding(&wm_high) ||
8508                     (rdev->disp_priority == 2)) {
8509                         DRM_DEBUG_KMS("force priority to high\n");
8510                 }
8511
8512                 /* watermark for low clocks */
8513                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8514                     rdev->pm.dpm_enabled) {
8515                         wm_low.yclk =
8516                                 radeon_dpm_get_mclk(rdev, true) * 10;
8517                         wm_low.sclk =
8518                                 radeon_dpm_get_sclk(rdev, true) * 10;
8519                 } else {
8520                         wm_low.yclk = rdev->pm.current_mclk * 10;
8521                         wm_low.sclk = rdev->pm.current_sclk * 10;
8522                 }
8523
8524                 wm_low.disp_clk = mode->clock;
8525                 wm_low.src_width = mode->crtc_hdisplay;
8526                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8527                 wm_low.blank_time = line_time - wm_low.active_time;
8528                 wm_low.interlaced = false;
8529                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8530                         wm_low.interlaced = true;
8531                 wm_low.vsc = radeon_crtc->vsc;
8532                 wm_low.vtaps = 1;
8533                 if (radeon_crtc->rmx_type != RMX_OFF)
8534                         wm_low.vtaps = 2;
8535                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8536                 wm_low.lb_size = lb_size;
8537                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8538                 wm_low.num_heads = num_heads;
8539
8540                 /* set for low clocks */
8541                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8542
8543                 /* possibly force display priority to high */
8544                 /* should really do this at mode validation time... */
8545                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8546                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8547                     !dce8_check_latency_hiding(&wm_low) ||
8548                     (rdev->disp_priority == 2)) {
8549                         DRM_DEBUG_KMS("force priority to high\n");
8550                 }
8551         }
8552
8553         /* select wm A */
8554         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8555         tmp = wm_mask;
8556         tmp &= ~LATENCY_WATERMARK_MASK(3);
8557         tmp |= LATENCY_WATERMARK_MASK(1);
8558         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8559         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8560                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8561                 LATENCY_HIGH_WATERMARK(line_time)));
8562         /* select wm B */
8563         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8564         tmp &= ~LATENCY_WATERMARK_MASK(3);
8565         tmp |= LATENCY_WATERMARK_MASK(2);
8566         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8567         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8568                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8569                 LATENCY_HIGH_WATERMARK(line_time)));
8570         /* restore original selection */
8571         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8572
8573         /* save values for DPM */
8574         radeon_crtc->line_time = line_time;
8575         radeon_crtc->wm_high = latency_watermark_a;
8576         radeon_crtc->wm_low = latency_watermark_b;
8577 }
8578
8579 /**
8580  * dce8_bandwidth_update - program display watermarks
8581  *
8582  * @rdev: radeon_device pointer
8583  *
8584  * Calculate and program the display watermarks and line
8585  * buffer allocation (CIK).
8586  */
8587 void dce8_bandwidth_update(struct radeon_device *rdev)
8588 {
8589         struct drm_display_mode *mode = NULL;
8590         u32 num_heads = 0, lb_size;
8591         int i;
8592
8593         radeon_update_display_priority(rdev);
8594
8595         for (i = 0; i < rdev->num_crtc; i++) {
8596                 if (rdev->mode_info.crtcs[i]->base.enabled)
8597                         num_heads++;
8598         }
8599         for (i = 0; i < rdev->num_crtc; i++) {
8600                 mode = &rdev->mode_info.crtcs[i]->base.mode;
8601                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8602                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8603         }
8604 }
8605
8606 /**
8607  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8608  *
8609  * @rdev: radeon_device pointer
8610  *
8611  * Fetches a GPU clock counter snapshot (SI).
8612  * Returns the 64 bit clock counter snapshot.
8613  */
8614 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8615 {
8616         uint64_t clock;
8617
8618         mutex_lock(&rdev->gpu_clock_mutex);
8619         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8620         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8621                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8622         mutex_unlock(&rdev->gpu_clock_mutex);
8623         return clock;
8624 }
8625
8626 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8627                               u32 cntl_reg, u32 status_reg)
8628 {
8629         int r, i;
8630         struct atom_clock_dividers dividers;
8631         uint32_t tmp;
8632
8633         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8634                                            clock, false, &dividers);
8635         if (r)
8636                 return r;
8637
8638         tmp = RREG32_SMC(cntl_reg);
8639         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8640         tmp |= dividers.post_divider;
8641         WREG32_SMC(cntl_reg, tmp);
8642
8643         for (i = 0; i < 100; i++) {
8644                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8645                         break;
8646                 mdelay(10);
8647         }
8648         if (i == 100)
8649                 return -ETIMEDOUT;
8650
8651         return 0;
8652 }
8653
8654 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8655 {
8656         int r = 0;
8657
8658         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8659         if (r)
8660                 return r;
8661
8662         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8663         return r;
8664 }
8665
8666 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8667 {
8668         struct pci_dev *root = rdev->pdev->bus->self;
8669         int bridge_pos, gpu_pos;
8670         u32 speed_cntl, mask, current_data_rate;
8671         int ret, i;
8672         u16 tmp16;
8673
8674         if (radeon_pcie_gen2 == 0)
8675                 return;
8676
8677         if (rdev->flags & RADEON_IS_IGP)
8678                 return;
8679
8680         if (!(rdev->flags & RADEON_IS_PCIE))
8681                 return;
8682
8683         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8684         if (ret != 0)
8685                 return;
8686
8687         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8688                 return;
8689
8690         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8691         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8692                 LC_CURRENT_DATA_RATE_SHIFT;
8693         if (mask & DRM_PCIE_SPEED_80) {
8694                 if (current_data_rate == 2) {
8695                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8696                         return;
8697                 }
8698                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8699         } else if (mask & DRM_PCIE_SPEED_50) {
8700                 if (current_data_rate == 1) {
8701                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8702                         return;
8703                 }
8704                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8705         }
8706
8707         bridge_pos = pci_pcie_cap(root);
8708         if (!bridge_pos)
8709                 return;
8710
8711         gpu_pos = pci_pcie_cap(rdev->pdev);
8712         if (!gpu_pos)
8713                 return;
8714
8715         if (mask & DRM_PCIE_SPEED_80) {
8716                 /* re-try equalization if gen3 is not already enabled */
8717                 if (current_data_rate != 2) {
8718                         u16 bridge_cfg, gpu_cfg;
8719                         u16 bridge_cfg2, gpu_cfg2;
8720                         u32 max_lw, current_lw, tmp;
8721
8722                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8723                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8724
8725                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8726                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8727
8728                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8729                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8730
8731                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8732                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8733                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8734
8735                         if (current_lw < max_lw) {
8736                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8737                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8738                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8739                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8740                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8741                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8742                                 }
8743                         }
8744
8745                         for (i = 0; i < 10; i++) {
8746                                 /* check status */
8747                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8748                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8749                                         break;
8750
8751                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8752                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8753
8754                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8755                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8756
8757                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8758                                 tmp |= LC_SET_QUIESCE;
8759                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8760
8761                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8762                                 tmp |= LC_REDO_EQ;
8763                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8764
8765                                 mdelay(100);
8766
8767                                 /* linkctl */
8768                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8769                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8770                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8771                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8772
8773                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8774                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8775                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8776                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8777
8778                                 /* linkctl2 */
8779                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8780                                 tmp16 &= ~((1 << 4) | (7 << 9));
8781                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8782                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8783
8784                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8785                                 tmp16 &= ~((1 << 4) | (7 << 9));
8786                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8787                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8788
8789                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8790                                 tmp &= ~LC_SET_QUIESCE;
8791                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8792                         }
8793                 }
8794         }
8795
8796         /* set the link speed */
8797         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8798         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8799         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8800
8801         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8802         tmp16 &= ~0xf;
8803         if (mask & DRM_PCIE_SPEED_80)
8804                 tmp16 |= 3; /* gen3 */
8805         else if (mask & DRM_PCIE_SPEED_50)
8806                 tmp16 |= 2; /* gen2 */
8807         else
8808                 tmp16 |= 1; /* gen1 */
8809         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8810
8811         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8812         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8813         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8814
8815         for (i = 0; i < rdev->usec_timeout; i++) {
8816                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8817                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8818                         break;
8819                 udelay(1);
8820         }
8821 }
8822
8823 static void cik_program_aspm(struct radeon_device *rdev)
8824 {
8825         u32 data, orig;
8826         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8827         bool disable_clkreq = false;
8828
8829         if (radeon_aspm == 0)
8830                 return;
8831
8832         /* XXX double check IGPs */
8833         if (rdev->flags & RADEON_IS_IGP)
8834                 return;
8835
8836         if (!(rdev->flags & RADEON_IS_PCIE))
8837                 return;
8838
8839         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8840         data &= ~LC_XMIT_N_FTS_MASK;
8841         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8842         if (orig != data)
8843                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8844
8845         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8846         data |= LC_GO_TO_RECOVERY;
8847         if (orig != data)
8848                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8849
8850         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8851         data |= P_IGNORE_EDB_ERR;
8852         if (orig != data)
8853                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8854
8855         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8856         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8857         data |= LC_PMI_TO_L1_DIS;
8858         if (!disable_l0s)
8859                 data |= LC_L0S_INACTIVITY(7);
8860
8861         if (!disable_l1) {
8862                 data |= LC_L1_INACTIVITY(7);
8863                 data &= ~LC_PMI_TO_L1_DIS;
8864                 if (orig != data)
8865                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8866
8867                 if (!disable_plloff_in_l1) {
8868                         bool clk_req_support;
8869
8870                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8871                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8872                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8873                         if (orig != data)
8874                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8875
8876                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8877                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8878                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8879                         if (orig != data)
8880                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8881
8882                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8883                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8884                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8885                         if (orig != data)
8886                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8887
8888                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8889                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8890                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8891                         if (orig != data)
8892                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8893
8894                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8895                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8896                         data |= LC_DYN_LANES_PWR_STATE(3);
8897                         if (orig != data)
8898                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8899
8900                         if (!disable_clkreq) {
8901                                 struct pci_dev *root = rdev->pdev->bus->self;
8902                                 u32 lnkcap;
8903
8904                                 clk_req_support = false;
8905                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8906                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8907                                         clk_req_support = true;
8908                         } else {
8909                                 clk_req_support = false;
8910                         }
8911
8912                         if (clk_req_support) {
8913                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8914                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8915                                 if (orig != data)
8916                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8917
8918                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
8919                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8920                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8921                                 if (orig != data)
8922                                         WREG32_SMC(THM_CLK_CNTL, data);
8923
8924                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8925                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8926                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8927                                 if (orig != data)
8928                                         WREG32_SMC(MISC_CLK_CTRL, data);
8929
8930                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8931                                 data &= ~BCLK_AS_XCLK;
8932                                 if (orig != data)
8933                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
8934
8935                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8936                                 data &= ~FORCE_BIF_REFCLK_EN;
8937                                 if (orig != data)
8938                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8939
8940                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8941                                 data &= ~MPLL_CLKOUT_SEL_MASK;
8942                                 data |= MPLL_CLKOUT_SEL(4);
8943                                 if (orig != data)
8944                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8945                         }
8946                 }
8947         } else {
8948                 if (orig != data)
8949                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8950         }
8951
8952         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8953         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8954         if (orig != data)
8955                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8956
8957         if (!disable_l0s) {
8958                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8959                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8960                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8961                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8962                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8963                                 data &= ~LC_L0S_INACTIVITY_MASK;
8964                                 if (orig != data)
8965                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8966                         }
8967                 }
8968         }
8969 }