Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[platform/adaptation/renesas_rcar/renesas_kernel.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
52 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
53 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
58 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
59 MODULE_FIRMWARE("radeon/KABINI_me.bin");
60 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
61 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
62 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
63 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
64
65 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
66 extern void r600_ih_ring_fini(struct radeon_device *rdev);
67 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
68 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
69 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
70 extern void sumo_rlc_fini(struct radeon_device *rdev);
71 extern int sumo_rlc_init(struct radeon_device *rdev);
72 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
73 extern void si_rlc_reset(struct radeon_device *rdev);
74 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
75 extern int cik_sdma_resume(struct radeon_device *rdev);
76 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
77 extern void cik_sdma_fini(struct radeon_device *rdev);
78 static void cik_rlc_stop(struct radeon_device *rdev);
79 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
80 static void cik_program_aspm(struct radeon_device *rdev);
81 static void cik_init_pg(struct radeon_device *rdev);
82 static void cik_init_cg(struct radeon_device *rdev);
83 static void cik_fini_pg(struct radeon_device *rdev);
84 static void cik_fini_cg(struct radeon_device *rdev);
85 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
86                                           bool enable);
87
88 /* get temperature in millidegrees */
89 int ci_get_temp(struct radeon_device *rdev)
90 {
91         u32 temp;
92         int actual_temp = 0;
93
94         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
95                 CTF_TEMP_SHIFT;
96
97         if (temp & 0x200)
98                 actual_temp = 255;
99         else
100                 actual_temp = temp & 0x1ff;
101
102         actual_temp = actual_temp * 1000;
103
104         return actual_temp;
105 }
106
107 /* get temperature in millidegrees */
108 int kv_get_temp(struct radeon_device *rdev)
109 {
110         u32 temp;
111         int actual_temp = 0;
112
113         temp = RREG32_SMC(0xC0300E0C);
114
115         if (temp)
116                 actual_temp = (temp / 8) - 49;
117         else
118                 actual_temp = 0;
119
120         actual_temp = actual_temp * 1000;
121
122         return actual_temp;
123 }
124
125 /*
126  * Indirect registers accessor
127  */
128 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
129 {
130         unsigned long flags;
131         u32 r;
132
133         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
134         WREG32(PCIE_INDEX, reg);
135         (void)RREG32(PCIE_INDEX);
136         r = RREG32(PCIE_DATA);
137         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
138         return r;
139 }
140
141 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
142 {
143         unsigned long flags;
144
145         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
146         WREG32(PCIE_INDEX, reg);
147         (void)RREG32(PCIE_INDEX);
148         WREG32(PCIE_DATA, v);
149         (void)RREG32(PCIE_DATA);
150         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
151 }
152
153 static const u32 spectre_rlc_save_restore_register_list[] =
154 {
155         (0x0e00 << 16) | (0xc12c >> 2),
156         0x00000000,
157         (0x0e00 << 16) | (0xc140 >> 2),
158         0x00000000,
159         (0x0e00 << 16) | (0xc150 >> 2),
160         0x00000000,
161         (0x0e00 << 16) | (0xc15c >> 2),
162         0x00000000,
163         (0x0e00 << 16) | (0xc168 >> 2),
164         0x00000000,
165         (0x0e00 << 16) | (0xc170 >> 2),
166         0x00000000,
167         (0x0e00 << 16) | (0xc178 >> 2),
168         0x00000000,
169         (0x0e00 << 16) | (0xc204 >> 2),
170         0x00000000,
171         (0x0e00 << 16) | (0xc2b4 >> 2),
172         0x00000000,
173         (0x0e00 << 16) | (0xc2b8 >> 2),
174         0x00000000,
175         (0x0e00 << 16) | (0xc2bc >> 2),
176         0x00000000,
177         (0x0e00 << 16) | (0xc2c0 >> 2),
178         0x00000000,
179         (0x0e00 << 16) | (0x8228 >> 2),
180         0x00000000,
181         (0x0e00 << 16) | (0x829c >> 2),
182         0x00000000,
183         (0x0e00 << 16) | (0x869c >> 2),
184         0x00000000,
185         (0x0600 << 16) | (0x98f4 >> 2),
186         0x00000000,
187         (0x0e00 << 16) | (0x98f8 >> 2),
188         0x00000000,
189         (0x0e00 << 16) | (0x9900 >> 2),
190         0x00000000,
191         (0x0e00 << 16) | (0xc260 >> 2),
192         0x00000000,
193         (0x0e00 << 16) | (0x90e8 >> 2),
194         0x00000000,
195         (0x0e00 << 16) | (0x3c000 >> 2),
196         0x00000000,
197         (0x0e00 << 16) | (0x3c00c >> 2),
198         0x00000000,
199         (0x0e00 << 16) | (0x8c1c >> 2),
200         0x00000000,
201         (0x0e00 << 16) | (0x9700 >> 2),
202         0x00000000,
203         (0x0e00 << 16) | (0xcd20 >> 2),
204         0x00000000,
205         (0x4e00 << 16) | (0xcd20 >> 2),
206         0x00000000,
207         (0x5e00 << 16) | (0xcd20 >> 2),
208         0x00000000,
209         (0x6e00 << 16) | (0xcd20 >> 2),
210         0x00000000,
211         (0x7e00 << 16) | (0xcd20 >> 2),
212         0x00000000,
213         (0x8e00 << 16) | (0xcd20 >> 2),
214         0x00000000,
215         (0x9e00 << 16) | (0xcd20 >> 2),
216         0x00000000,
217         (0xae00 << 16) | (0xcd20 >> 2),
218         0x00000000,
219         (0xbe00 << 16) | (0xcd20 >> 2),
220         0x00000000,
221         (0x0e00 << 16) | (0x89bc >> 2),
222         0x00000000,
223         (0x0e00 << 16) | (0x8900 >> 2),
224         0x00000000,
225         0x3,
226         (0x0e00 << 16) | (0xc130 >> 2),
227         0x00000000,
228         (0x0e00 << 16) | (0xc134 >> 2),
229         0x00000000,
230         (0x0e00 << 16) | (0xc1fc >> 2),
231         0x00000000,
232         (0x0e00 << 16) | (0xc208 >> 2),
233         0x00000000,
234         (0x0e00 << 16) | (0xc264 >> 2),
235         0x00000000,
236         (0x0e00 << 16) | (0xc268 >> 2),
237         0x00000000,
238         (0x0e00 << 16) | (0xc26c >> 2),
239         0x00000000,
240         (0x0e00 << 16) | (0xc270 >> 2),
241         0x00000000,
242         (0x0e00 << 16) | (0xc274 >> 2),
243         0x00000000,
244         (0x0e00 << 16) | (0xc278 >> 2),
245         0x00000000,
246         (0x0e00 << 16) | (0xc27c >> 2),
247         0x00000000,
248         (0x0e00 << 16) | (0xc280 >> 2),
249         0x00000000,
250         (0x0e00 << 16) | (0xc284 >> 2),
251         0x00000000,
252         (0x0e00 << 16) | (0xc288 >> 2),
253         0x00000000,
254         (0x0e00 << 16) | (0xc28c >> 2),
255         0x00000000,
256         (0x0e00 << 16) | (0xc290 >> 2),
257         0x00000000,
258         (0x0e00 << 16) | (0xc294 >> 2),
259         0x00000000,
260         (0x0e00 << 16) | (0xc298 >> 2),
261         0x00000000,
262         (0x0e00 << 16) | (0xc29c >> 2),
263         0x00000000,
264         (0x0e00 << 16) | (0xc2a0 >> 2),
265         0x00000000,
266         (0x0e00 << 16) | (0xc2a4 >> 2),
267         0x00000000,
268         (0x0e00 << 16) | (0xc2a8 >> 2),
269         0x00000000,
270         (0x0e00 << 16) | (0xc2ac  >> 2),
271         0x00000000,
272         (0x0e00 << 16) | (0xc2b0 >> 2),
273         0x00000000,
274         (0x0e00 << 16) | (0x301d0 >> 2),
275         0x00000000,
276         (0x0e00 << 16) | (0x30238 >> 2),
277         0x00000000,
278         (0x0e00 << 16) | (0x30250 >> 2),
279         0x00000000,
280         (0x0e00 << 16) | (0x30254 >> 2),
281         0x00000000,
282         (0x0e00 << 16) | (0x30258 >> 2),
283         0x00000000,
284         (0x0e00 << 16) | (0x3025c >> 2),
285         0x00000000,
286         (0x4e00 << 16) | (0xc900 >> 2),
287         0x00000000,
288         (0x5e00 << 16) | (0xc900 >> 2),
289         0x00000000,
290         (0x6e00 << 16) | (0xc900 >> 2),
291         0x00000000,
292         (0x7e00 << 16) | (0xc900 >> 2),
293         0x00000000,
294         (0x8e00 << 16) | (0xc900 >> 2),
295         0x00000000,
296         (0x9e00 << 16) | (0xc900 >> 2),
297         0x00000000,
298         (0xae00 << 16) | (0xc900 >> 2),
299         0x00000000,
300         (0xbe00 << 16) | (0xc900 >> 2),
301         0x00000000,
302         (0x4e00 << 16) | (0xc904 >> 2),
303         0x00000000,
304         (0x5e00 << 16) | (0xc904 >> 2),
305         0x00000000,
306         (0x6e00 << 16) | (0xc904 >> 2),
307         0x00000000,
308         (0x7e00 << 16) | (0xc904 >> 2),
309         0x00000000,
310         (0x8e00 << 16) | (0xc904 >> 2),
311         0x00000000,
312         (0x9e00 << 16) | (0xc904 >> 2),
313         0x00000000,
314         (0xae00 << 16) | (0xc904 >> 2),
315         0x00000000,
316         (0xbe00 << 16) | (0xc904 >> 2),
317         0x00000000,
318         (0x4e00 << 16) | (0xc908 >> 2),
319         0x00000000,
320         (0x5e00 << 16) | (0xc908 >> 2),
321         0x00000000,
322         (0x6e00 << 16) | (0xc908 >> 2),
323         0x00000000,
324         (0x7e00 << 16) | (0xc908 >> 2),
325         0x00000000,
326         (0x8e00 << 16) | (0xc908 >> 2),
327         0x00000000,
328         (0x9e00 << 16) | (0xc908 >> 2),
329         0x00000000,
330         (0xae00 << 16) | (0xc908 >> 2),
331         0x00000000,
332         (0xbe00 << 16) | (0xc908 >> 2),
333         0x00000000,
334         (0x4e00 << 16) | (0xc90c >> 2),
335         0x00000000,
336         (0x5e00 << 16) | (0xc90c >> 2),
337         0x00000000,
338         (0x6e00 << 16) | (0xc90c >> 2),
339         0x00000000,
340         (0x7e00 << 16) | (0xc90c >> 2),
341         0x00000000,
342         (0x8e00 << 16) | (0xc90c >> 2),
343         0x00000000,
344         (0x9e00 << 16) | (0xc90c >> 2),
345         0x00000000,
346         (0xae00 << 16) | (0xc90c >> 2),
347         0x00000000,
348         (0xbe00 << 16) | (0xc90c >> 2),
349         0x00000000,
350         (0x4e00 << 16) | (0xc910 >> 2),
351         0x00000000,
352         (0x5e00 << 16) | (0xc910 >> 2),
353         0x00000000,
354         (0x6e00 << 16) | (0xc910 >> 2),
355         0x00000000,
356         (0x7e00 << 16) | (0xc910 >> 2),
357         0x00000000,
358         (0x8e00 << 16) | (0xc910 >> 2),
359         0x00000000,
360         (0x9e00 << 16) | (0xc910 >> 2),
361         0x00000000,
362         (0xae00 << 16) | (0xc910 >> 2),
363         0x00000000,
364         (0xbe00 << 16) | (0xc910 >> 2),
365         0x00000000,
366         (0x0e00 << 16) | (0xc99c >> 2),
367         0x00000000,
368         (0x0e00 << 16) | (0x9834 >> 2),
369         0x00000000,
370         (0x0000 << 16) | (0x30f00 >> 2),
371         0x00000000,
372         (0x0001 << 16) | (0x30f00 >> 2),
373         0x00000000,
374         (0x0000 << 16) | (0x30f04 >> 2),
375         0x00000000,
376         (0x0001 << 16) | (0x30f04 >> 2),
377         0x00000000,
378         (0x0000 << 16) | (0x30f08 >> 2),
379         0x00000000,
380         (0x0001 << 16) | (0x30f08 >> 2),
381         0x00000000,
382         (0x0000 << 16) | (0x30f0c >> 2),
383         0x00000000,
384         (0x0001 << 16) | (0x30f0c >> 2),
385         0x00000000,
386         (0x0600 << 16) | (0x9b7c >> 2),
387         0x00000000,
388         (0x0e00 << 16) | (0x8a14 >> 2),
389         0x00000000,
390         (0x0e00 << 16) | (0x8a18 >> 2),
391         0x00000000,
392         (0x0600 << 16) | (0x30a00 >> 2),
393         0x00000000,
394         (0x0e00 << 16) | (0x8bf0 >> 2),
395         0x00000000,
396         (0x0e00 << 16) | (0x8bcc >> 2),
397         0x00000000,
398         (0x0e00 << 16) | (0x8b24 >> 2),
399         0x00000000,
400         (0x0e00 << 16) | (0x30a04 >> 2),
401         0x00000000,
402         (0x0600 << 16) | (0x30a10 >> 2),
403         0x00000000,
404         (0x0600 << 16) | (0x30a14 >> 2),
405         0x00000000,
406         (0x0600 << 16) | (0x30a18 >> 2),
407         0x00000000,
408         (0x0600 << 16) | (0x30a2c >> 2),
409         0x00000000,
410         (0x0e00 << 16) | (0xc700 >> 2),
411         0x00000000,
412         (0x0e00 << 16) | (0xc704 >> 2),
413         0x00000000,
414         (0x0e00 << 16) | (0xc708 >> 2),
415         0x00000000,
416         (0x0e00 << 16) | (0xc768 >> 2),
417         0x00000000,
418         (0x0400 << 16) | (0xc770 >> 2),
419         0x00000000,
420         (0x0400 << 16) | (0xc774 >> 2),
421         0x00000000,
422         (0x0400 << 16) | (0xc778 >> 2),
423         0x00000000,
424         (0x0400 << 16) | (0xc77c >> 2),
425         0x00000000,
426         (0x0400 << 16) | (0xc780 >> 2),
427         0x00000000,
428         (0x0400 << 16) | (0xc784 >> 2),
429         0x00000000,
430         (0x0400 << 16) | (0xc788 >> 2),
431         0x00000000,
432         (0x0400 << 16) | (0xc78c >> 2),
433         0x00000000,
434         (0x0400 << 16) | (0xc798 >> 2),
435         0x00000000,
436         (0x0400 << 16) | (0xc79c >> 2),
437         0x00000000,
438         (0x0400 << 16) | (0xc7a0 >> 2),
439         0x00000000,
440         (0x0400 << 16) | (0xc7a4 >> 2),
441         0x00000000,
442         (0x0400 << 16) | (0xc7a8 >> 2),
443         0x00000000,
444         (0x0400 << 16) | (0xc7ac >> 2),
445         0x00000000,
446         (0x0400 << 16) | (0xc7b0 >> 2),
447         0x00000000,
448         (0x0400 << 16) | (0xc7b4 >> 2),
449         0x00000000,
450         (0x0e00 << 16) | (0x9100 >> 2),
451         0x00000000,
452         (0x0e00 << 16) | (0x3c010 >> 2),
453         0x00000000,
454         (0x0e00 << 16) | (0x92a8 >> 2),
455         0x00000000,
456         (0x0e00 << 16) | (0x92ac >> 2),
457         0x00000000,
458         (0x0e00 << 16) | (0x92b4 >> 2),
459         0x00000000,
460         (0x0e00 << 16) | (0x92b8 >> 2),
461         0x00000000,
462         (0x0e00 << 16) | (0x92bc >> 2),
463         0x00000000,
464         (0x0e00 << 16) | (0x92c0 >> 2),
465         0x00000000,
466         (0x0e00 << 16) | (0x92c4 >> 2),
467         0x00000000,
468         (0x0e00 << 16) | (0x92c8 >> 2),
469         0x00000000,
470         (0x0e00 << 16) | (0x92cc >> 2),
471         0x00000000,
472         (0x0e00 << 16) | (0x92d0 >> 2),
473         0x00000000,
474         (0x0e00 << 16) | (0x8c00 >> 2),
475         0x00000000,
476         (0x0e00 << 16) | (0x8c04 >> 2),
477         0x00000000,
478         (0x0e00 << 16) | (0x8c20 >> 2),
479         0x00000000,
480         (0x0e00 << 16) | (0x8c38 >> 2),
481         0x00000000,
482         (0x0e00 << 16) | (0x8c3c >> 2),
483         0x00000000,
484         (0x0e00 << 16) | (0xae00 >> 2),
485         0x00000000,
486         (0x0e00 << 16) | (0x9604 >> 2),
487         0x00000000,
488         (0x0e00 << 16) | (0xac08 >> 2),
489         0x00000000,
490         (0x0e00 << 16) | (0xac0c >> 2),
491         0x00000000,
492         (0x0e00 << 16) | (0xac10 >> 2),
493         0x00000000,
494         (0x0e00 << 16) | (0xac14 >> 2),
495         0x00000000,
496         (0x0e00 << 16) | (0xac58 >> 2),
497         0x00000000,
498         (0x0e00 << 16) | (0xac68 >> 2),
499         0x00000000,
500         (0x0e00 << 16) | (0xac6c >> 2),
501         0x00000000,
502         (0x0e00 << 16) | (0xac70 >> 2),
503         0x00000000,
504         (0x0e00 << 16) | (0xac74 >> 2),
505         0x00000000,
506         (0x0e00 << 16) | (0xac78 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0xac7c >> 2),
509         0x00000000,
510         (0x0e00 << 16) | (0xac80 >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0xac84 >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0xac88 >> 2),
515         0x00000000,
516         (0x0e00 << 16) | (0xac8c >> 2),
517         0x00000000,
518         (0x0e00 << 16) | (0x970c >> 2),
519         0x00000000,
520         (0x0e00 << 16) | (0x9714 >> 2),
521         0x00000000,
522         (0x0e00 << 16) | (0x9718 >> 2),
523         0x00000000,
524         (0x0e00 << 16) | (0x971c >> 2),
525         0x00000000,
526         (0x0e00 << 16) | (0x31068 >> 2),
527         0x00000000,
528         (0x4e00 << 16) | (0x31068 >> 2),
529         0x00000000,
530         (0x5e00 << 16) | (0x31068 >> 2),
531         0x00000000,
532         (0x6e00 << 16) | (0x31068 >> 2),
533         0x00000000,
534         (0x7e00 << 16) | (0x31068 >> 2),
535         0x00000000,
536         (0x8e00 << 16) | (0x31068 >> 2),
537         0x00000000,
538         (0x9e00 << 16) | (0x31068 >> 2),
539         0x00000000,
540         (0xae00 << 16) | (0x31068 >> 2),
541         0x00000000,
542         (0xbe00 << 16) | (0x31068 >> 2),
543         0x00000000,
544         (0x0e00 << 16) | (0xcd10 >> 2),
545         0x00000000,
546         (0x0e00 << 16) | (0xcd14 >> 2),
547         0x00000000,
548         (0x0e00 << 16) | (0x88b0 >> 2),
549         0x00000000,
550         (0x0e00 << 16) | (0x88b4 >> 2),
551         0x00000000,
552         (0x0e00 << 16) | (0x88b8 >> 2),
553         0x00000000,
554         (0x0e00 << 16) | (0x88bc >> 2),
555         0x00000000,
556         (0x0400 << 16) | (0x89c0 >> 2),
557         0x00000000,
558         (0x0e00 << 16) | (0x88c4 >> 2),
559         0x00000000,
560         (0x0e00 << 16) | (0x88c8 >> 2),
561         0x00000000,
562         (0x0e00 << 16) | (0x88d0 >> 2),
563         0x00000000,
564         (0x0e00 << 16) | (0x88d4 >> 2),
565         0x00000000,
566         (0x0e00 << 16) | (0x88d8 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x8980 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x30938 >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x3093c >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x30940 >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x89a0 >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x30900 >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x30904 >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x89b4 >> 2),
583         0x00000000,
584         (0x0e00 << 16) | (0x3c210 >> 2),
585         0x00000000,
586         (0x0e00 << 16) | (0x3c214 >> 2),
587         0x00000000,
588         (0x0e00 << 16) | (0x3c218 >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0x8904 >> 2),
591         0x00000000,
592         0x5,
593         (0x0e00 << 16) | (0x8c28 >> 2),
594         (0x0e00 << 16) | (0x8c2c >> 2),
595         (0x0e00 << 16) | (0x8c30 >> 2),
596         (0x0e00 << 16) | (0x8c34 >> 2),
597         (0x0e00 << 16) | (0x9600 >> 2),
598 };
599
600 static const u32 kalindi_rlc_save_restore_register_list[] =
601 {
602         (0x0e00 << 16) | (0xc12c >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0xc140 >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xc150 >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xc15c >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0xc168 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0xc170 >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0xc204 >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0xc2b4 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0xc2b8 >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0xc2bc >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0xc2c0 >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0x8228 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0x829c >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0x869c >> 2),
629         0x00000000,
630         (0x0600 << 16) | (0x98f4 >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0x98f8 >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0x9900 >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0xc260 >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0x90e8 >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0x3c000 >> 2),
641         0x00000000,
642         (0x0e00 << 16) | (0x3c00c >> 2),
643         0x00000000,
644         (0x0e00 << 16) | (0x8c1c >> 2),
645         0x00000000,
646         (0x0e00 << 16) | (0x9700 >> 2),
647         0x00000000,
648         (0x0e00 << 16) | (0xcd20 >> 2),
649         0x00000000,
650         (0x4e00 << 16) | (0xcd20 >> 2),
651         0x00000000,
652         (0x5e00 << 16) | (0xcd20 >> 2),
653         0x00000000,
654         (0x6e00 << 16) | (0xcd20 >> 2),
655         0x00000000,
656         (0x7e00 << 16) | (0xcd20 >> 2),
657         0x00000000,
658         (0x0e00 << 16) | (0x89bc >> 2),
659         0x00000000,
660         (0x0e00 << 16) | (0x8900 >> 2),
661         0x00000000,
662         0x3,
663         (0x0e00 << 16) | (0xc130 >> 2),
664         0x00000000,
665         (0x0e00 << 16) | (0xc134 >> 2),
666         0x00000000,
667         (0x0e00 << 16) | (0xc1fc >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0xc208 >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0xc264 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0xc268 >> 2),
674         0x00000000,
675         (0x0e00 << 16) | (0xc26c >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0xc270 >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0xc274 >> 2),
680         0x00000000,
681         (0x0e00 << 16) | (0xc28c >> 2),
682         0x00000000,
683         (0x0e00 << 16) | (0xc290 >> 2),
684         0x00000000,
685         (0x0e00 << 16) | (0xc294 >> 2),
686         0x00000000,
687         (0x0e00 << 16) | (0xc298 >> 2),
688         0x00000000,
689         (0x0e00 << 16) | (0xc2a0 >> 2),
690         0x00000000,
691         (0x0e00 << 16) | (0xc2a4 >> 2),
692         0x00000000,
693         (0x0e00 << 16) | (0xc2a8 >> 2),
694         0x00000000,
695         (0x0e00 << 16) | (0xc2ac >> 2),
696         0x00000000,
697         (0x0e00 << 16) | (0x301d0 >> 2),
698         0x00000000,
699         (0x0e00 << 16) | (0x30238 >> 2),
700         0x00000000,
701         (0x0e00 << 16) | (0x30250 >> 2),
702         0x00000000,
703         (0x0e00 << 16) | (0x30254 >> 2),
704         0x00000000,
705         (0x0e00 << 16) | (0x30258 >> 2),
706         0x00000000,
707         (0x0e00 << 16) | (0x3025c >> 2),
708         0x00000000,
709         (0x4e00 << 16) | (0xc900 >> 2),
710         0x00000000,
711         (0x5e00 << 16) | (0xc900 >> 2),
712         0x00000000,
713         (0x6e00 << 16) | (0xc900 >> 2),
714         0x00000000,
715         (0x7e00 << 16) | (0xc900 >> 2),
716         0x00000000,
717         (0x4e00 << 16) | (0xc904 >> 2),
718         0x00000000,
719         (0x5e00 << 16) | (0xc904 >> 2),
720         0x00000000,
721         (0x6e00 << 16) | (0xc904 >> 2),
722         0x00000000,
723         (0x7e00 << 16) | (0xc904 >> 2),
724         0x00000000,
725         (0x4e00 << 16) | (0xc908 >> 2),
726         0x00000000,
727         (0x5e00 << 16) | (0xc908 >> 2),
728         0x00000000,
729         (0x6e00 << 16) | (0xc908 >> 2),
730         0x00000000,
731         (0x7e00 << 16) | (0xc908 >> 2),
732         0x00000000,
733         (0x4e00 << 16) | (0xc90c >> 2),
734         0x00000000,
735         (0x5e00 << 16) | (0xc90c >> 2),
736         0x00000000,
737         (0x6e00 << 16) | (0xc90c >> 2),
738         0x00000000,
739         (0x7e00 << 16) | (0xc90c >> 2),
740         0x00000000,
741         (0x4e00 << 16) | (0xc910 >> 2),
742         0x00000000,
743         (0x5e00 << 16) | (0xc910 >> 2),
744         0x00000000,
745         (0x6e00 << 16) | (0xc910 >> 2),
746         0x00000000,
747         (0x7e00 << 16) | (0xc910 >> 2),
748         0x00000000,
749         (0x0e00 << 16) | (0xc99c >> 2),
750         0x00000000,
751         (0x0e00 << 16) | (0x9834 >> 2),
752         0x00000000,
753         (0x0000 << 16) | (0x30f00 >> 2),
754         0x00000000,
755         (0x0000 << 16) | (0x30f04 >> 2),
756         0x00000000,
757         (0x0000 << 16) | (0x30f08 >> 2),
758         0x00000000,
759         (0x0000 << 16) | (0x30f0c >> 2),
760         0x00000000,
761         (0x0600 << 16) | (0x9b7c >> 2),
762         0x00000000,
763         (0x0e00 << 16) | (0x8a14 >> 2),
764         0x00000000,
765         (0x0e00 << 16) | (0x8a18 >> 2),
766         0x00000000,
767         (0x0600 << 16) | (0x30a00 >> 2),
768         0x00000000,
769         (0x0e00 << 16) | (0x8bf0 >> 2),
770         0x00000000,
771         (0x0e00 << 16) | (0x8bcc >> 2),
772         0x00000000,
773         (0x0e00 << 16) | (0x8b24 >> 2),
774         0x00000000,
775         (0x0e00 << 16) | (0x30a04 >> 2),
776         0x00000000,
777         (0x0600 << 16) | (0x30a10 >> 2),
778         0x00000000,
779         (0x0600 << 16) | (0x30a14 >> 2),
780         0x00000000,
781         (0x0600 << 16) | (0x30a18 >> 2),
782         0x00000000,
783         (0x0600 << 16) | (0x30a2c >> 2),
784         0x00000000,
785         (0x0e00 << 16) | (0xc700 >> 2),
786         0x00000000,
787         (0x0e00 << 16) | (0xc704 >> 2),
788         0x00000000,
789         (0x0e00 << 16) | (0xc708 >> 2),
790         0x00000000,
791         (0x0e00 << 16) | (0xc768 >> 2),
792         0x00000000,
793         (0x0400 << 16) | (0xc770 >> 2),
794         0x00000000,
795         (0x0400 << 16) | (0xc774 >> 2),
796         0x00000000,
797         (0x0400 << 16) | (0xc798 >> 2),
798         0x00000000,
799         (0x0400 << 16) | (0xc79c >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0x9100 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0x3c010 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0x8c00 >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0x8c04 >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0x8c20 >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0x8c38 >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0x8c3c >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0xae00 >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0x9604 >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0xac08 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0xac0c >> 2),
822         0x00000000,
823         (0x0e00 << 16) | (0xac10 >> 2),
824         0x00000000,
825         (0x0e00 << 16) | (0xac14 >> 2),
826         0x00000000,
827         (0x0e00 << 16) | (0xac58 >> 2),
828         0x00000000,
829         (0x0e00 << 16) | (0xac68 >> 2),
830         0x00000000,
831         (0x0e00 << 16) | (0xac6c >> 2),
832         0x00000000,
833         (0x0e00 << 16) | (0xac70 >> 2),
834         0x00000000,
835         (0x0e00 << 16) | (0xac74 >> 2),
836         0x00000000,
837         (0x0e00 << 16) | (0xac78 >> 2),
838         0x00000000,
839         (0x0e00 << 16) | (0xac7c >> 2),
840         0x00000000,
841         (0x0e00 << 16) | (0xac80 >> 2),
842         0x00000000,
843         (0x0e00 << 16) | (0xac84 >> 2),
844         0x00000000,
845         (0x0e00 << 16) | (0xac88 >> 2),
846         0x00000000,
847         (0x0e00 << 16) | (0xac8c >> 2),
848         0x00000000,
849         (0x0e00 << 16) | (0x970c >> 2),
850         0x00000000,
851         (0x0e00 << 16) | (0x9714 >> 2),
852         0x00000000,
853         (0x0e00 << 16) | (0x9718 >> 2),
854         0x00000000,
855         (0x0e00 << 16) | (0x971c >> 2),
856         0x00000000,
857         (0x0e00 << 16) | (0x31068 >> 2),
858         0x00000000,
859         (0x4e00 << 16) | (0x31068 >> 2),
860         0x00000000,
861         (0x5e00 << 16) | (0x31068 >> 2),
862         0x00000000,
863         (0x6e00 << 16) | (0x31068 >> 2),
864         0x00000000,
865         (0x7e00 << 16) | (0x31068 >> 2),
866         0x00000000,
867         (0x0e00 << 16) | (0xcd10 >> 2),
868         0x00000000,
869         (0x0e00 << 16) | (0xcd14 >> 2),
870         0x00000000,
871         (0x0e00 << 16) | (0x88b0 >> 2),
872         0x00000000,
873         (0x0e00 << 16) | (0x88b4 >> 2),
874         0x00000000,
875         (0x0e00 << 16) | (0x88b8 >> 2),
876         0x00000000,
877         (0x0e00 << 16) | (0x88bc >> 2),
878         0x00000000,
879         (0x0400 << 16) | (0x89c0 >> 2),
880         0x00000000,
881         (0x0e00 << 16) | (0x88c4 >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0x88c8 >> 2),
884         0x00000000,
885         (0x0e00 << 16) | (0x88d0 >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x88d4 >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x88d8 >> 2),
890         0x00000000,
891         (0x0e00 << 16) | (0x8980 >> 2),
892         0x00000000,
893         (0x0e00 << 16) | (0x30938 >> 2),
894         0x00000000,
895         (0x0e00 << 16) | (0x3093c >> 2),
896         0x00000000,
897         (0x0e00 << 16) | (0x30940 >> 2),
898         0x00000000,
899         (0x0e00 << 16) | (0x89a0 >> 2),
900         0x00000000,
901         (0x0e00 << 16) | (0x30900 >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0x30904 >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0x89b4 >> 2),
906         0x00000000,
907         (0x0e00 << 16) | (0x3e1fc >> 2),
908         0x00000000,
909         (0x0e00 << 16) | (0x3c210 >> 2),
910         0x00000000,
911         (0x0e00 << 16) | (0x3c214 >> 2),
912         0x00000000,
913         (0x0e00 << 16) | (0x3c218 >> 2),
914         0x00000000,
915         (0x0e00 << 16) | (0x8904 >> 2),
916         0x00000000,
917         0x5,
918         (0x0e00 << 16) | (0x8c28 >> 2),
919         (0x0e00 << 16) | (0x8c2c >> 2),
920         (0x0e00 << 16) | (0x8c30 >> 2),
921         (0x0e00 << 16) | (0x8c34 >> 2),
922         (0x0e00 << 16) | (0x9600 >> 2),
923 };
924
925 static const u32 bonaire_golden_spm_registers[] =
926 {
927         0x30800, 0xe0ffffff, 0xe0000000
928 };
929
930 static const u32 bonaire_golden_common_registers[] =
931 {
932         0xc770, 0xffffffff, 0x00000800,
933         0xc774, 0xffffffff, 0x00000800,
934         0xc798, 0xffffffff, 0x00007fbf,
935         0xc79c, 0xffffffff, 0x00007faf
936 };
937
938 static const u32 bonaire_golden_registers[] =
939 {
940         0x3354, 0x00000333, 0x00000333,
941         0x3350, 0x000c0fc0, 0x00040200,
942         0x9a10, 0x00010000, 0x00058208,
943         0x3c000, 0xffff1fff, 0x00140000,
944         0x3c200, 0xfdfc0fff, 0x00000100,
945         0x3c234, 0x40000000, 0x40000200,
946         0x9830, 0xffffffff, 0x00000000,
947         0x9834, 0xf00fffff, 0x00000400,
948         0x9838, 0x0002021c, 0x00020200,
949         0xc78, 0x00000080, 0x00000000,
950         0x5bb0, 0x000000f0, 0x00000070,
951         0x5bc0, 0xf0311fff, 0x80300000,
952         0x98f8, 0x73773777, 0x12010001,
953         0x350c, 0x00810000, 0x408af000,
954         0x7030, 0x31000111, 0x00000011,
955         0x2f48, 0x73773777, 0x12010001,
956         0x220c, 0x00007fb6, 0x0021a1b1,
957         0x2210, 0x00007fb6, 0x002021b1,
958         0x2180, 0x00007fb6, 0x00002191,
959         0x2218, 0x00007fb6, 0x002121b1,
960         0x221c, 0x00007fb6, 0x002021b1,
961         0x21dc, 0x00007fb6, 0x00002191,
962         0x21e0, 0x00007fb6, 0x00002191,
963         0x3628, 0x0000003f, 0x0000000a,
964         0x362c, 0x0000003f, 0x0000000a,
965         0x2ae4, 0x00073ffe, 0x000022a2,
966         0x240c, 0x000007ff, 0x00000000,
967         0x8a14, 0xf000003f, 0x00000007,
968         0x8bf0, 0x00002001, 0x00000001,
969         0x8b24, 0xffffffff, 0x00ffffff,
970         0x30a04, 0x0000ff0f, 0x00000000,
971         0x28a4c, 0x07ffffff, 0x06000000,
972         0x4d8, 0x00000fff, 0x00000100,
973         0x3e78, 0x00000001, 0x00000002,
974         0x9100, 0x03000000, 0x0362c688,
975         0x8c00, 0x000000ff, 0x00000001,
976         0xe40, 0x00001fff, 0x00001fff,
977         0x9060, 0x0000007f, 0x00000020,
978         0x9508, 0x00010000, 0x00010000,
979         0xac14, 0x000003ff, 0x000000f3,
980         0xac0c, 0xffffffff, 0x00001032
981 };
982
983 static const u32 bonaire_mgcg_cgcg_init[] =
984 {
985         0xc420, 0xffffffff, 0xfffffffc,
986         0x30800, 0xffffffff, 0xe0000000,
987         0x3c2a0, 0xffffffff, 0x00000100,
988         0x3c208, 0xffffffff, 0x00000100,
989         0x3c2c0, 0xffffffff, 0xc0000100,
990         0x3c2c8, 0xffffffff, 0xc0000100,
991         0x3c2c4, 0xffffffff, 0xc0000100,
992         0x55e4, 0xffffffff, 0x00600100,
993         0x3c280, 0xffffffff, 0x00000100,
994         0x3c214, 0xffffffff, 0x06000100,
995         0x3c220, 0xffffffff, 0x00000100,
996         0x3c218, 0xffffffff, 0x06000100,
997         0x3c204, 0xffffffff, 0x00000100,
998         0x3c2e0, 0xffffffff, 0x00000100,
999         0x3c224, 0xffffffff, 0x00000100,
1000         0x3c200, 0xffffffff, 0x00000100,
1001         0x3c230, 0xffffffff, 0x00000100,
1002         0x3c234, 0xffffffff, 0x00000100,
1003         0x3c250, 0xffffffff, 0x00000100,
1004         0x3c254, 0xffffffff, 0x00000100,
1005         0x3c258, 0xffffffff, 0x00000100,
1006         0x3c25c, 0xffffffff, 0x00000100,
1007         0x3c260, 0xffffffff, 0x00000100,
1008         0x3c27c, 0xffffffff, 0x00000100,
1009         0x3c278, 0xffffffff, 0x00000100,
1010         0x3c210, 0xffffffff, 0x06000100,
1011         0x3c290, 0xffffffff, 0x00000100,
1012         0x3c274, 0xffffffff, 0x00000100,
1013         0x3c2b4, 0xffffffff, 0x00000100,
1014         0x3c2b0, 0xffffffff, 0x00000100,
1015         0x3c270, 0xffffffff, 0x00000100,
1016         0x30800, 0xffffffff, 0xe0000000,
1017         0x3c020, 0xffffffff, 0x00010000,
1018         0x3c024, 0xffffffff, 0x00030002,
1019         0x3c028, 0xffffffff, 0x00040007,
1020         0x3c02c, 0xffffffff, 0x00060005,
1021         0x3c030, 0xffffffff, 0x00090008,
1022         0x3c034, 0xffffffff, 0x00010000,
1023         0x3c038, 0xffffffff, 0x00030002,
1024         0x3c03c, 0xffffffff, 0x00040007,
1025         0x3c040, 0xffffffff, 0x00060005,
1026         0x3c044, 0xffffffff, 0x00090008,
1027         0x3c048, 0xffffffff, 0x00010000,
1028         0x3c04c, 0xffffffff, 0x00030002,
1029         0x3c050, 0xffffffff, 0x00040007,
1030         0x3c054, 0xffffffff, 0x00060005,
1031         0x3c058, 0xffffffff, 0x00090008,
1032         0x3c05c, 0xffffffff, 0x00010000,
1033         0x3c060, 0xffffffff, 0x00030002,
1034         0x3c064, 0xffffffff, 0x00040007,
1035         0x3c068, 0xffffffff, 0x00060005,
1036         0x3c06c, 0xffffffff, 0x00090008,
1037         0x3c070, 0xffffffff, 0x00010000,
1038         0x3c074, 0xffffffff, 0x00030002,
1039         0x3c078, 0xffffffff, 0x00040007,
1040         0x3c07c, 0xffffffff, 0x00060005,
1041         0x3c080, 0xffffffff, 0x00090008,
1042         0x3c084, 0xffffffff, 0x00010000,
1043         0x3c088, 0xffffffff, 0x00030002,
1044         0x3c08c, 0xffffffff, 0x00040007,
1045         0x3c090, 0xffffffff, 0x00060005,
1046         0x3c094, 0xffffffff, 0x00090008,
1047         0x3c098, 0xffffffff, 0x00010000,
1048         0x3c09c, 0xffffffff, 0x00030002,
1049         0x3c0a0, 0xffffffff, 0x00040007,
1050         0x3c0a4, 0xffffffff, 0x00060005,
1051         0x3c0a8, 0xffffffff, 0x00090008,
1052         0x3c000, 0xffffffff, 0x96e00200,
1053         0x8708, 0xffffffff, 0x00900100,
1054         0xc424, 0xffffffff, 0x0020003f,
1055         0x38, 0xffffffff, 0x0140001c,
1056         0x3c, 0x000f0000, 0x000f0000,
1057         0x220, 0xffffffff, 0xC060000C,
1058         0x224, 0xc0000fff, 0x00000100,
1059         0xf90, 0xffffffff, 0x00000100,
1060         0xf98, 0x00000101, 0x00000000,
1061         0x20a8, 0xffffffff, 0x00000104,
1062         0x55e4, 0xff000fff, 0x00000100,
1063         0x30cc, 0xc0000fff, 0x00000104,
1064         0xc1e4, 0x00000001, 0x00000001,
1065         0xd00c, 0xff000ff0, 0x00000100,
1066         0xd80c, 0xff000ff0, 0x00000100
1067 };
1068
1069 static const u32 spectre_golden_spm_registers[] =
1070 {
1071         0x30800, 0xe0ffffff, 0xe0000000
1072 };
1073
1074 static const u32 spectre_golden_common_registers[] =
1075 {
1076         0xc770, 0xffffffff, 0x00000800,
1077         0xc774, 0xffffffff, 0x00000800,
1078         0xc798, 0xffffffff, 0x00007fbf,
1079         0xc79c, 0xffffffff, 0x00007faf
1080 };
1081
1082 static const u32 spectre_golden_registers[] =
1083 {
1084         0x3c000, 0xffff1fff, 0x96940200,
1085         0x3c00c, 0xffff0001, 0xff000000,
1086         0x3c200, 0xfffc0fff, 0x00000100,
1087         0x6ed8, 0x00010101, 0x00010000,
1088         0x9834, 0xf00fffff, 0x00000400,
1089         0x9838, 0xfffffffc, 0x00020200,
1090         0x5bb0, 0x000000f0, 0x00000070,
1091         0x5bc0, 0xf0311fff, 0x80300000,
1092         0x98f8, 0x73773777, 0x12010001,
1093         0x9b7c, 0x00ff0000, 0x00fc0000,
1094         0x2f48, 0x73773777, 0x12010001,
1095         0x8a14, 0xf000003f, 0x00000007,
1096         0x8b24, 0xffffffff, 0x00ffffff,
1097         0x28350, 0x3f3f3fff, 0x00000082,
1098         0x28355, 0x0000003f, 0x00000000,
1099         0x3e78, 0x00000001, 0x00000002,
1100         0x913c, 0xffff03df, 0x00000004,
1101         0xc768, 0x00000008, 0x00000008,
1102         0x8c00, 0x000008ff, 0x00000800,
1103         0x9508, 0x00010000, 0x00010000,
1104         0xac0c, 0xffffffff, 0x54763210,
1105         0x214f8, 0x01ff01ff, 0x00000002,
1106         0x21498, 0x007ff800, 0x00200000,
1107         0x2015c, 0xffffffff, 0x00000f40,
1108         0x30934, 0xffffffff, 0x00000001
1109 };
1110
1111 static const u32 spectre_mgcg_cgcg_init[] =
1112 {
1113         0xc420, 0xffffffff, 0xfffffffc,
1114         0x30800, 0xffffffff, 0xe0000000,
1115         0x3c2a0, 0xffffffff, 0x00000100,
1116         0x3c208, 0xffffffff, 0x00000100,
1117         0x3c2c0, 0xffffffff, 0x00000100,
1118         0x3c2c8, 0xffffffff, 0x00000100,
1119         0x3c2c4, 0xffffffff, 0x00000100,
1120         0x55e4, 0xffffffff, 0x00600100,
1121         0x3c280, 0xffffffff, 0x00000100,
1122         0x3c214, 0xffffffff, 0x06000100,
1123         0x3c220, 0xffffffff, 0x00000100,
1124         0x3c218, 0xffffffff, 0x06000100,
1125         0x3c204, 0xffffffff, 0x00000100,
1126         0x3c2e0, 0xffffffff, 0x00000100,
1127         0x3c224, 0xffffffff, 0x00000100,
1128         0x3c200, 0xffffffff, 0x00000100,
1129         0x3c230, 0xffffffff, 0x00000100,
1130         0x3c234, 0xffffffff, 0x00000100,
1131         0x3c250, 0xffffffff, 0x00000100,
1132         0x3c254, 0xffffffff, 0x00000100,
1133         0x3c258, 0xffffffff, 0x00000100,
1134         0x3c25c, 0xffffffff, 0x00000100,
1135         0x3c260, 0xffffffff, 0x00000100,
1136         0x3c27c, 0xffffffff, 0x00000100,
1137         0x3c278, 0xffffffff, 0x00000100,
1138         0x3c210, 0xffffffff, 0x06000100,
1139         0x3c290, 0xffffffff, 0x00000100,
1140         0x3c274, 0xffffffff, 0x00000100,
1141         0x3c2b4, 0xffffffff, 0x00000100,
1142         0x3c2b0, 0xffffffff, 0x00000100,
1143         0x3c270, 0xffffffff, 0x00000100,
1144         0x30800, 0xffffffff, 0xe0000000,
1145         0x3c020, 0xffffffff, 0x00010000,
1146         0x3c024, 0xffffffff, 0x00030002,
1147         0x3c028, 0xffffffff, 0x00040007,
1148         0x3c02c, 0xffffffff, 0x00060005,
1149         0x3c030, 0xffffffff, 0x00090008,
1150         0x3c034, 0xffffffff, 0x00010000,
1151         0x3c038, 0xffffffff, 0x00030002,
1152         0x3c03c, 0xffffffff, 0x00040007,
1153         0x3c040, 0xffffffff, 0x00060005,
1154         0x3c044, 0xffffffff, 0x00090008,
1155         0x3c048, 0xffffffff, 0x00010000,
1156         0x3c04c, 0xffffffff, 0x00030002,
1157         0x3c050, 0xffffffff, 0x00040007,
1158         0x3c054, 0xffffffff, 0x00060005,
1159         0x3c058, 0xffffffff, 0x00090008,
1160         0x3c05c, 0xffffffff, 0x00010000,
1161         0x3c060, 0xffffffff, 0x00030002,
1162         0x3c064, 0xffffffff, 0x00040007,
1163         0x3c068, 0xffffffff, 0x00060005,
1164         0x3c06c, 0xffffffff, 0x00090008,
1165         0x3c070, 0xffffffff, 0x00010000,
1166         0x3c074, 0xffffffff, 0x00030002,
1167         0x3c078, 0xffffffff, 0x00040007,
1168         0x3c07c, 0xffffffff, 0x00060005,
1169         0x3c080, 0xffffffff, 0x00090008,
1170         0x3c084, 0xffffffff, 0x00010000,
1171         0x3c088, 0xffffffff, 0x00030002,
1172         0x3c08c, 0xffffffff, 0x00040007,
1173         0x3c090, 0xffffffff, 0x00060005,
1174         0x3c094, 0xffffffff, 0x00090008,
1175         0x3c098, 0xffffffff, 0x00010000,
1176         0x3c09c, 0xffffffff, 0x00030002,
1177         0x3c0a0, 0xffffffff, 0x00040007,
1178         0x3c0a4, 0xffffffff, 0x00060005,
1179         0x3c0a8, 0xffffffff, 0x00090008,
1180         0x3c0ac, 0xffffffff, 0x00010000,
1181         0x3c0b0, 0xffffffff, 0x00030002,
1182         0x3c0b4, 0xffffffff, 0x00040007,
1183         0x3c0b8, 0xffffffff, 0x00060005,
1184         0x3c0bc, 0xffffffff, 0x00090008,
1185         0x3c000, 0xffffffff, 0x96e00200,
1186         0x8708, 0xffffffff, 0x00900100,
1187         0xc424, 0xffffffff, 0x0020003f,
1188         0x38, 0xffffffff, 0x0140001c,
1189         0x3c, 0x000f0000, 0x000f0000,
1190         0x220, 0xffffffff, 0xC060000C,
1191         0x224, 0xc0000fff, 0x00000100,
1192         0xf90, 0xffffffff, 0x00000100,
1193         0xf98, 0x00000101, 0x00000000,
1194         0x20a8, 0xffffffff, 0x00000104,
1195         0x55e4, 0xff000fff, 0x00000100,
1196         0x30cc, 0xc0000fff, 0x00000104,
1197         0xc1e4, 0x00000001, 0x00000001,
1198         0xd00c, 0xff000ff0, 0x00000100,
1199         0xd80c, 0xff000ff0, 0x00000100
1200 };
1201
1202 static const u32 kalindi_golden_spm_registers[] =
1203 {
1204         0x30800, 0xe0ffffff, 0xe0000000
1205 };
1206
1207 static const u32 kalindi_golden_common_registers[] =
1208 {
1209         0xc770, 0xffffffff, 0x00000800,
1210         0xc774, 0xffffffff, 0x00000800,
1211         0xc798, 0xffffffff, 0x00007fbf,
1212         0xc79c, 0xffffffff, 0x00007faf
1213 };
1214
1215 static const u32 kalindi_golden_registers[] =
1216 {
1217         0x3c000, 0xffffdfff, 0x6e944040,
1218         0x55e4, 0xff607fff, 0xfc000100,
1219         0x3c220, 0xff000fff, 0x00000100,
1220         0x3c224, 0xff000fff, 0x00000100,
1221         0x3c200, 0xfffc0fff, 0x00000100,
1222         0x6ed8, 0x00010101, 0x00010000,
1223         0x9830, 0xffffffff, 0x00000000,
1224         0x9834, 0xf00fffff, 0x00000400,
1225         0x5bb0, 0x000000f0, 0x00000070,
1226         0x5bc0, 0xf0311fff, 0x80300000,
1227         0x98f8, 0x73773777, 0x12010001,
1228         0x98fc, 0xffffffff, 0x00000010,
1229         0x9b7c, 0x00ff0000, 0x00fc0000,
1230         0x8030, 0x00001f0f, 0x0000100a,
1231         0x2f48, 0x73773777, 0x12010001,
1232         0x2408, 0x000fffff, 0x000c007f,
1233         0x8a14, 0xf000003f, 0x00000007,
1234         0x8b24, 0x3fff3fff, 0x00ffcfff,
1235         0x30a04, 0x0000ff0f, 0x00000000,
1236         0x28a4c, 0x07ffffff, 0x06000000,
1237         0x4d8, 0x00000fff, 0x00000100,
1238         0x3e78, 0x00000001, 0x00000002,
1239         0xc768, 0x00000008, 0x00000008,
1240         0x8c00, 0x000000ff, 0x00000003,
1241         0x214f8, 0x01ff01ff, 0x00000002,
1242         0x21498, 0x007ff800, 0x00200000,
1243         0x2015c, 0xffffffff, 0x00000f40,
1244         0x88c4, 0x001f3ae3, 0x00000082,
1245         0x88d4, 0x0000001f, 0x00000010,
1246         0x30934, 0xffffffff, 0x00000000
1247 };
1248
1249 static const u32 kalindi_mgcg_cgcg_init[] =
1250 {
1251         0xc420, 0xffffffff, 0xfffffffc,
1252         0x30800, 0xffffffff, 0xe0000000,
1253         0x3c2a0, 0xffffffff, 0x00000100,
1254         0x3c208, 0xffffffff, 0x00000100,
1255         0x3c2c0, 0xffffffff, 0x00000100,
1256         0x3c2c8, 0xffffffff, 0x00000100,
1257         0x3c2c4, 0xffffffff, 0x00000100,
1258         0x55e4, 0xffffffff, 0x00600100,
1259         0x3c280, 0xffffffff, 0x00000100,
1260         0x3c214, 0xffffffff, 0x06000100,
1261         0x3c220, 0xffffffff, 0x00000100,
1262         0x3c218, 0xffffffff, 0x06000100,
1263         0x3c204, 0xffffffff, 0x00000100,
1264         0x3c2e0, 0xffffffff, 0x00000100,
1265         0x3c224, 0xffffffff, 0x00000100,
1266         0x3c200, 0xffffffff, 0x00000100,
1267         0x3c230, 0xffffffff, 0x00000100,
1268         0x3c234, 0xffffffff, 0x00000100,
1269         0x3c250, 0xffffffff, 0x00000100,
1270         0x3c254, 0xffffffff, 0x00000100,
1271         0x3c258, 0xffffffff, 0x00000100,
1272         0x3c25c, 0xffffffff, 0x00000100,
1273         0x3c260, 0xffffffff, 0x00000100,
1274         0x3c27c, 0xffffffff, 0x00000100,
1275         0x3c278, 0xffffffff, 0x00000100,
1276         0x3c210, 0xffffffff, 0x06000100,
1277         0x3c290, 0xffffffff, 0x00000100,
1278         0x3c274, 0xffffffff, 0x00000100,
1279         0x3c2b4, 0xffffffff, 0x00000100,
1280         0x3c2b0, 0xffffffff, 0x00000100,
1281         0x3c270, 0xffffffff, 0x00000100,
1282         0x30800, 0xffffffff, 0xe0000000,
1283         0x3c020, 0xffffffff, 0x00010000,
1284         0x3c024, 0xffffffff, 0x00030002,
1285         0x3c028, 0xffffffff, 0x00040007,
1286         0x3c02c, 0xffffffff, 0x00060005,
1287         0x3c030, 0xffffffff, 0x00090008,
1288         0x3c034, 0xffffffff, 0x00010000,
1289         0x3c038, 0xffffffff, 0x00030002,
1290         0x3c03c, 0xffffffff, 0x00040007,
1291         0x3c040, 0xffffffff, 0x00060005,
1292         0x3c044, 0xffffffff, 0x00090008,
1293         0x3c000, 0xffffffff, 0x96e00200,
1294         0x8708, 0xffffffff, 0x00900100,
1295         0xc424, 0xffffffff, 0x0020003f,
1296         0x38, 0xffffffff, 0x0140001c,
1297         0x3c, 0x000f0000, 0x000f0000,
1298         0x220, 0xffffffff, 0xC060000C,
1299         0x224, 0xc0000fff, 0x00000100,
1300         0x20a8, 0xffffffff, 0x00000104,
1301         0x55e4, 0xff000fff, 0x00000100,
1302         0x30cc, 0xc0000fff, 0x00000104,
1303         0xc1e4, 0x00000001, 0x00000001,
1304         0xd00c, 0xff000ff0, 0x00000100,
1305         0xd80c, 0xff000ff0, 0x00000100
1306 };
1307
1308 static const u32 hawaii_golden_spm_registers[] =
1309 {
1310         0x30800, 0xe0ffffff, 0xe0000000
1311 };
1312
1313 static const u32 hawaii_golden_common_registers[] =
1314 {
1315         0x30800, 0xffffffff, 0xe0000000,
1316         0x28350, 0xffffffff, 0x3a00161a,
1317         0x28354, 0xffffffff, 0x0000002e,
1318         0x9a10, 0xffffffff, 0x00018208,
1319         0x98f8, 0xffffffff, 0x12011003
1320 };
1321
1322 static const u32 hawaii_golden_registers[] =
1323 {
1324         0x3354, 0x00000333, 0x00000333,
1325         0x9a10, 0x00010000, 0x00058208,
1326         0x9830, 0xffffffff, 0x00000000,
1327         0x9834, 0xf00fffff, 0x00000400,
1328         0x9838, 0x0002021c, 0x00020200,
1329         0xc78, 0x00000080, 0x00000000,
1330         0x5bb0, 0x000000f0, 0x00000070,
1331         0x5bc0, 0xf0311fff, 0x80300000,
1332         0x350c, 0x00810000, 0x408af000,
1333         0x7030, 0x31000111, 0x00000011,
1334         0x2f48, 0x73773777, 0x12010001,
1335         0x2120, 0x0000007f, 0x0000001b,
1336         0x21dc, 0x00007fb6, 0x00002191,
1337         0x3628, 0x0000003f, 0x0000000a,
1338         0x362c, 0x0000003f, 0x0000000a,
1339         0x2ae4, 0x00073ffe, 0x000022a2,
1340         0x240c, 0x000007ff, 0x00000000,
1341         0x8bf0, 0x00002001, 0x00000001,
1342         0x8b24, 0xffffffff, 0x00ffffff,
1343         0x30a04, 0x0000ff0f, 0x00000000,
1344         0x28a4c, 0x07ffffff, 0x06000000,
1345         0x3e78, 0x00000001, 0x00000002,
1346         0xc768, 0x00000008, 0x00000008,
1347         0xc770, 0x00000f00, 0x00000800,
1348         0xc774, 0x00000f00, 0x00000800,
1349         0xc798, 0x00ffffff, 0x00ff7fbf,
1350         0xc79c, 0x00ffffff, 0x00ff7faf,
1351         0x8c00, 0x000000ff, 0x00000800,
1352         0xe40, 0x00001fff, 0x00001fff,
1353         0x9060, 0x0000007f, 0x00000020,
1354         0x9508, 0x00010000, 0x00010000,
1355         0xae00, 0x00100000, 0x000ff07c,
1356         0xac14, 0x000003ff, 0x0000000f,
1357         0xac10, 0xffffffff, 0x7564fdec,
1358         0xac0c, 0xffffffff, 0x3120b9a8,
1359         0xac08, 0x20000000, 0x0f9c0000
1360 };
1361
1362 static const u32 hawaii_mgcg_cgcg_init[] =
1363 {
1364         0xc420, 0xffffffff, 0xfffffffd,
1365         0x30800, 0xffffffff, 0xe0000000,
1366         0x3c2a0, 0xffffffff, 0x00000100,
1367         0x3c208, 0xffffffff, 0x00000100,
1368         0x3c2c0, 0xffffffff, 0x00000100,
1369         0x3c2c8, 0xffffffff, 0x00000100,
1370         0x3c2c4, 0xffffffff, 0x00000100,
1371         0x55e4, 0xffffffff, 0x00200100,
1372         0x3c280, 0xffffffff, 0x00000100,
1373         0x3c214, 0xffffffff, 0x06000100,
1374         0x3c220, 0xffffffff, 0x00000100,
1375         0x3c218, 0xffffffff, 0x06000100,
1376         0x3c204, 0xffffffff, 0x00000100,
1377         0x3c2e0, 0xffffffff, 0x00000100,
1378         0x3c224, 0xffffffff, 0x00000100,
1379         0x3c200, 0xffffffff, 0x00000100,
1380         0x3c230, 0xffffffff, 0x00000100,
1381         0x3c234, 0xffffffff, 0x00000100,
1382         0x3c250, 0xffffffff, 0x00000100,
1383         0x3c254, 0xffffffff, 0x00000100,
1384         0x3c258, 0xffffffff, 0x00000100,
1385         0x3c25c, 0xffffffff, 0x00000100,
1386         0x3c260, 0xffffffff, 0x00000100,
1387         0x3c27c, 0xffffffff, 0x00000100,
1388         0x3c278, 0xffffffff, 0x00000100,
1389         0x3c210, 0xffffffff, 0x06000100,
1390         0x3c290, 0xffffffff, 0x00000100,
1391         0x3c274, 0xffffffff, 0x00000100,
1392         0x3c2b4, 0xffffffff, 0x00000100,
1393         0x3c2b0, 0xffffffff, 0x00000100,
1394         0x3c270, 0xffffffff, 0x00000100,
1395         0x30800, 0xffffffff, 0xe0000000,
1396         0x3c020, 0xffffffff, 0x00010000,
1397         0x3c024, 0xffffffff, 0x00030002,
1398         0x3c028, 0xffffffff, 0x00040007,
1399         0x3c02c, 0xffffffff, 0x00060005,
1400         0x3c030, 0xffffffff, 0x00090008,
1401         0x3c034, 0xffffffff, 0x00010000,
1402         0x3c038, 0xffffffff, 0x00030002,
1403         0x3c03c, 0xffffffff, 0x00040007,
1404         0x3c040, 0xffffffff, 0x00060005,
1405         0x3c044, 0xffffffff, 0x00090008,
1406         0x3c048, 0xffffffff, 0x00010000,
1407         0x3c04c, 0xffffffff, 0x00030002,
1408         0x3c050, 0xffffffff, 0x00040007,
1409         0x3c054, 0xffffffff, 0x00060005,
1410         0x3c058, 0xffffffff, 0x00090008,
1411         0x3c05c, 0xffffffff, 0x00010000,
1412         0x3c060, 0xffffffff, 0x00030002,
1413         0x3c064, 0xffffffff, 0x00040007,
1414         0x3c068, 0xffffffff, 0x00060005,
1415         0x3c06c, 0xffffffff, 0x00090008,
1416         0x3c070, 0xffffffff, 0x00010000,
1417         0x3c074, 0xffffffff, 0x00030002,
1418         0x3c078, 0xffffffff, 0x00040007,
1419         0x3c07c, 0xffffffff, 0x00060005,
1420         0x3c080, 0xffffffff, 0x00090008,
1421         0x3c084, 0xffffffff, 0x00010000,
1422         0x3c088, 0xffffffff, 0x00030002,
1423         0x3c08c, 0xffffffff, 0x00040007,
1424         0x3c090, 0xffffffff, 0x00060005,
1425         0x3c094, 0xffffffff, 0x00090008,
1426         0x3c098, 0xffffffff, 0x00010000,
1427         0x3c09c, 0xffffffff, 0x00030002,
1428         0x3c0a0, 0xffffffff, 0x00040007,
1429         0x3c0a4, 0xffffffff, 0x00060005,
1430         0x3c0a8, 0xffffffff, 0x00090008,
1431         0x3c0ac, 0xffffffff, 0x00010000,
1432         0x3c0b0, 0xffffffff, 0x00030002,
1433         0x3c0b4, 0xffffffff, 0x00040007,
1434         0x3c0b8, 0xffffffff, 0x00060005,
1435         0x3c0bc, 0xffffffff, 0x00090008,
1436         0x3c0c0, 0xffffffff, 0x00010000,
1437         0x3c0c4, 0xffffffff, 0x00030002,
1438         0x3c0c8, 0xffffffff, 0x00040007,
1439         0x3c0cc, 0xffffffff, 0x00060005,
1440         0x3c0d0, 0xffffffff, 0x00090008,
1441         0x3c0d4, 0xffffffff, 0x00010000,
1442         0x3c0d8, 0xffffffff, 0x00030002,
1443         0x3c0dc, 0xffffffff, 0x00040007,
1444         0x3c0e0, 0xffffffff, 0x00060005,
1445         0x3c0e4, 0xffffffff, 0x00090008,
1446         0x3c0e8, 0xffffffff, 0x00010000,
1447         0x3c0ec, 0xffffffff, 0x00030002,
1448         0x3c0f0, 0xffffffff, 0x00040007,
1449         0x3c0f4, 0xffffffff, 0x00060005,
1450         0x3c0f8, 0xffffffff, 0x00090008,
1451         0xc318, 0xffffffff, 0x00020200,
1452         0x3350, 0xffffffff, 0x00000200,
1453         0x15c0, 0xffffffff, 0x00000400,
1454         0x55e8, 0xffffffff, 0x00000000,
1455         0x2f50, 0xffffffff, 0x00000902,
1456         0x3c000, 0xffffffff, 0x96940200,
1457         0x8708, 0xffffffff, 0x00900100,
1458         0xc424, 0xffffffff, 0x0020003f,
1459         0x38, 0xffffffff, 0x0140001c,
1460         0x3c, 0x000f0000, 0x000f0000,
1461         0x220, 0xffffffff, 0xc060000c,
1462         0x224, 0xc0000fff, 0x00000100,
1463         0xf90, 0xffffffff, 0x00000100,
1464         0xf98, 0x00000101, 0x00000000,
1465         0x20a8, 0xffffffff, 0x00000104,
1466         0x55e4, 0xff000fff, 0x00000100,
1467         0x30cc, 0xc0000fff, 0x00000104,
1468         0xc1e4, 0x00000001, 0x00000001,
1469         0xd00c, 0xff000ff0, 0x00000100,
1470         0xd80c, 0xff000ff0, 0x00000100
1471 };
1472
1473 static void cik_init_golden_registers(struct radeon_device *rdev)
1474 {
1475         switch (rdev->family) {
1476         case CHIP_BONAIRE:
1477                 radeon_program_register_sequence(rdev,
1478                                                  bonaire_mgcg_cgcg_init,
1479                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1480                 radeon_program_register_sequence(rdev,
1481                                                  bonaire_golden_registers,
1482                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1483                 radeon_program_register_sequence(rdev,
1484                                                  bonaire_golden_common_registers,
1485                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1486                 radeon_program_register_sequence(rdev,
1487                                                  bonaire_golden_spm_registers,
1488                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1489                 break;
1490         case CHIP_KABINI:
1491                 radeon_program_register_sequence(rdev,
1492                                                  kalindi_mgcg_cgcg_init,
1493                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1494                 radeon_program_register_sequence(rdev,
1495                                                  kalindi_golden_registers,
1496                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1497                 radeon_program_register_sequence(rdev,
1498                                                  kalindi_golden_common_registers,
1499                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1500                 radeon_program_register_sequence(rdev,
1501                                                  kalindi_golden_spm_registers,
1502                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1503                 break;
1504         case CHIP_KAVERI:
1505                 radeon_program_register_sequence(rdev,
1506                                                  spectre_mgcg_cgcg_init,
1507                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1508                 radeon_program_register_sequence(rdev,
1509                                                  spectre_golden_registers,
1510                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1511                 radeon_program_register_sequence(rdev,
1512                                                  spectre_golden_common_registers,
1513                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1514                 radeon_program_register_sequence(rdev,
1515                                                  spectre_golden_spm_registers,
1516                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1517                 break;
1518         case CHIP_HAWAII:
1519                 radeon_program_register_sequence(rdev,
1520                                                  hawaii_mgcg_cgcg_init,
1521                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1522                 radeon_program_register_sequence(rdev,
1523                                                  hawaii_golden_registers,
1524                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1525                 radeon_program_register_sequence(rdev,
1526                                                  hawaii_golden_common_registers,
1527                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1528                 radeon_program_register_sequence(rdev,
1529                                                  hawaii_golden_spm_registers,
1530                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1531                 break;
1532         default:
1533                 break;
1534         }
1535 }
1536
1537 /**
1538  * cik_get_xclk - get the xclk
1539  *
1540  * @rdev: radeon_device pointer
1541  *
1542  * Returns the reference clock used by the gfx engine
1543  * (CIK).
1544  */
1545 u32 cik_get_xclk(struct radeon_device *rdev)
1546 {
1547         u32 reference_clock = rdev->clock.spll.reference_freq;
1548
1549         if (rdev->flags & RADEON_IS_IGP) {
1550                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1551                         return reference_clock / 2;
1552         } else {
1553                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1554                         return reference_clock / 4;
1555         }
1556         return reference_clock;
1557 }
1558
1559 /**
1560  * cik_mm_rdoorbell - read a doorbell dword
1561  *
1562  * @rdev: radeon_device pointer
1563  * @index: doorbell index
1564  *
1565  * Returns the value in the doorbell aperture at the
1566  * requested doorbell index (CIK).
1567  */
1568 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1569 {
1570         if (index < rdev->doorbell.num_doorbells) {
1571                 return readl(rdev->doorbell.ptr + index);
1572         } else {
1573                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1574                 return 0;
1575         }
1576 }
1577
1578 /**
1579  * cik_mm_wdoorbell - write a doorbell dword
1580  *
1581  * @rdev: radeon_device pointer
1582  * @index: doorbell index
1583  * @v: value to write
1584  *
1585  * Writes @v to the doorbell aperture at the
1586  * requested doorbell index (CIK).
1587  */
1588 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1589 {
1590         if (index < rdev->doorbell.num_doorbells) {
1591                 writel(v, rdev->doorbell.ptr + index);
1592         } else {
1593                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1594         }
1595 }
1596
1597 #define BONAIRE_IO_MC_REGS_SIZE 36
1598
1599 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1600 {
1601         {0x00000070, 0x04400000},
1602         {0x00000071, 0x80c01803},
1603         {0x00000072, 0x00004004},
1604         {0x00000073, 0x00000100},
1605         {0x00000074, 0x00ff0000},
1606         {0x00000075, 0x34000000},
1607         {0x00000076, 0x08000014},
1608         {0x00000077, 0x00cc08ec},
1609         {0x00000078, 0x00000400},
1610         {0x00000079, 0x00000000},
1611         {0x0000007a, 0x04090000},
1612         {0x0000007c, 0x00000000},
1613         {0x0000007e, 0x4408a8e8},
1614         {0x0000007f, 0x00000304},
1615         {0x00000080, 0x00000000},
1616         {0x00000082, 0x00000001},
1617         {0x00000083, 0x00000002},
1618         {0x00000084, 0xf3e4f400},
1619         {0x00000085, 0x052024e3},
1620         {0x00000087, 0x00000000},
1621         {0x00000088, 0x01000000},
1622         {0x0000008a, 0x1c0a0000},
1623         {0x0000008b, 0xff010000},
1624         {0x0000008d, 0xffffefff},
1625         {0x0000008e, 0xfff3efff},
1626         {0x0000008f, 0xfff3efbf},
1627         {0x00000092, 0xf7ffffff},
1628         {0x00000093, 0xffffff7f},
1629         {0x00000095, 0x00101101},
1630         {0x00000096, 0x00000fff},
1631         {0x00000097, 0x00116fff},
1632         {0x00000098, 0x60010000},
1633         {0x00000099, 0x10010000},
1634         {0x0000009a, 0x00006000},
1635         {0x0000009b, 0x00001000},
1636         {0x0000009f, 0x00b48000}
1637 };
1638
1639 #define HAWAII_IO_MC_REGS_SIZE 22
1640
1641 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1642 {
1643         {0x0000007d, 0x40000000},
1644         {0x0000007e, 0x40180304},
1645         {0x0000007f, 0x0000ff00},
1646         {0x00000081, 0x00000000},
1647         {0x00000083, 0x00000800},
1648         {0x00000086, 0x00000000},
1649         {0x00000087, 0x00000100},
1650         {0x00000088, 0x00020100},
1651         {0x00000089, 0x00000000},
1652         {0x0000008b, 0x00040000},
1653         {0x0000008c, 0x00000100},
1654         {0x0000008e, 0xff010000},
1655         {0x00000090, 0xffffefff},
1656         {0x00000091, 0xfff3efff},
1657         {0x00000092, 0xfff3efbf},
1658         {0x00000093, 0xf7ffffff},
1659         {0x00000094, 0xffffff7f},
1660         {0x00000095, 0x00000fff},
1661         {0x00000096, 0x00116fff},
1662         {0x00000097, 0x60010000},
1663         {0x00000098, 0x10010000},
1664         {0x0000009f, 0x00c79000}
1665 };
1666
1667
1668 /**
1669  * cik_srbm_select - select specific register instances
1670  *
1671  * @rdev: radeon_device pointer
1672  * @me: selected ME (micro engine)
1673  * @pipe: pipe
1674  * @queue: queue
1675  * @vmid: VMID
1676  *
1677  * Switches the currently active registers instances.  Some
1678  * registers are instanced per VMID, others are instanced per
1679  * me/pipe/queue combination.
1680  */
1681 static void cik_srbm_select(struct radeon_device *rdev,
1682                             u32 me, u32 pipe, u32 queue, u32 vmid)
1683 {
1684         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1685                              MEID(me & 0x3) |
1686                              VMID(vmid & 0xf) |
1687                              QUEUEID(queue & 0x7));
1688         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1689 }
1690
1691 /* ucode loading */
1692 /**
1693  * ci_mc_load_microcode - load MC ucode into the hw
1694  *
1695  * @rdev: radeon_device pointer
1696  *
1697  * Load the GDDR MC ucode into the hw (CIK).
1698  * Returns 0 on success, error on failure.
1699  */
1700 static int ci_mc_load_microcode(struct radeon_device *rdev)
1701 {
1702         const __be32 *fw_data;
1703         u32 running, blackout = 0;
1704         u32 *io_mc_regs;
1705         int i, ucode_size, regs_size;
1706
1707         if (!rdev->mc_fw)
1708                 return -EINVAL;
1709
1710         switch (rdev->family) {
1711         case CHIP_BONAIRE:
1712                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1713                 ucode_size = CIK_MC_UCODE_SIZE;
1714                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1715                 break;
1716         case CHIP_HAWAII:
1717                 io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1718                 ucode_size = HAWAII_MC_UCODE_SIZE;
1719                 regs_size = HAWAII_IO_MC_REGS_SIZE;
1720                 break;
1721         default:
1722                 return -EINVAL;
1723         }
1724
1725         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1726
1727         if (running == 0) {
1728                 if (running) {
1729                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1730                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1731                 }
1732
1733                 /* reset the engine and set to writable */
1734                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1735                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1736
1737                 /* load mc io regs */
1738                 for (i = 0; i < regs_size; i++) {
1739                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1740                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1741                 }
1742                 /* load the MC ucode */
1743                 fw_data = (const __be32 *)rdev->mc_fw->data;
1744                 for (i = 0; i < ucode_size; i++)
1745                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1746
1747                 /* put the engine back into the active state */
1748                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1749                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1750                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1751
1752                 /* wait for training to complete */
1753                 for (i = 0; i < rdev->usec_timeout; i++) {
1754                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1755                                 break;
1756                         udelay(1);
1757                 }
1758                 for (i = 0; i < rdev->usec_timeout; i++) {
1759                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1760                                 break;
1761                         udelay(1);
1762                 }
1763
1764                 if (running)
1765                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1766         }
1767
1768         return 0;
1769 }
1770
1771 /**
1772  * cik_init_microcode - load ucode images from disk
1773  *
1774  * @rdev: radeon_device pointer
1775  *
1776  * Use the firmware interface to load the ucode images into
1777  * the driver (not loaded into hw).
1778  * Returns 0 on success, error on failure.
1779  */
1780 static int cik_init_microcode(struct radeon_device *rdev)
1781 {
1782         const char *chip_name;
1783         size_t pfp_req_size, me_req_size, ce_req_size,
1784                 mec_req_size, rlc_req_size, mc_req_size = 0,
1785                 sdma_req_size, smc_req_size = 0;
1786         char fw_name[30];
1787         int err;
1788
1789         DRM_DEBUG("\n");
1790
1791         switch (rdev->family) {
1792         case CHIP_BONAIRE:
1793                 chip_name = "BONAIRE";
1794                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1795                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1796                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1797                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1798                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1799                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1800                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1801                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1802                 break;
1803         case CHIP_HAWAII:
1804                 chip_name = "HAWAII";
1805                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1806                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1807                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1808                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1809                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1810                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1811                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1812                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1813                 break;
1814         case CHIP_KAVERI:
1815                 chip_name = "KAVERI";
1816                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1817                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1818                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1819                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1820                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1821                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1822                 break;
1823         case CHIP_KABINI:
1824                 chip_name = "KABINI";
1825                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1826                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1827                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1828                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1829                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1830                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1831                 break;
1832         default: BUG();
1833         }
1834
1835         DRM_INFO("Loading %s Microcode\n", chip_name);
1836
1837         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1838         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1839         if (err)
1840                 goto out;
1841         if (rdev->pfp_fw->size != pfp_req_size) {
1842                 printk(KERN_ERR
1843                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1844                        rdev->pfp_fw->size, fw_name);
1845                 err = -EINVAL;
1846                 goto out;
1847         }
1848
1849         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1850         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1851         if (err)
1852                 goto out;
1853         if (rdev->me_fw->size != me_req_size) {
1854                 printk(KERN_ERR
1855                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1856                        rdev->me_fw->size, fw_name);
1857                 err = -EINVAL;
1858         }
1859
1860         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1861         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1862         if (err)
1863                 goto out;
1864         if (rdev->ce_fw->size != ce_req_size) {
1865                 printk(KERN_ERR
1866                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1867                        rdev->ce_fw->size, fw_name);
1868                 err = -EINVAL;
1869         }
1870
1871         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1872         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1873         if (err)
1874                 goto out;
1875         if (rdev->mec_fw->size != mec_req_size) {
1876                 printk(KERN_ERR
1877                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1878                        rdev->mec_fw->size, fw_name);
1879                 err = -EINVAL;
1880         }
1881
1882         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1883         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1884         if (err)
1885                 goto out;
1886         if (rdev->rlc_fw->size != rlc_req_size) {
1887                 printk(KERN_ERR
1888                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1889                        rdev->rlc_fw->size, fw_name);
1890                 err = -EINVAL;
1891         }
1892
1893         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1894         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1895         if (err)
1896                 goto out;
1897         if (rdev->sdma_fw->size != sdma_req_size) {
1898                 printk(KERN_ERR
1899                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1900                        rdev->sdma_fw->size, fw_name);
1901                 err = -EINVAL;
1902         }
1903
1904         /* No SMC, MC ucode on APUs */
1905         if (!(rdev->flags & RADEON_IS_IGP)) {
1906                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1907                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1908                 if (err)
1909                         goto out;
1910                 if (rdev->mc_fw->size != mc_req_size) {
1911                         printk(KERN_ERR
1912                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1913                                rdev->mc_fw->size, fw_name);
1914                         err = -EINVAL;
1915                 }
1916
1917                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1918                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1919                 if (err) {
1920                         printk(KERN_ERR
1921                                "smc: error loading firmware \"%s\"\n",
1922                                fw_name);
1923                         release_firmware(rdev->smc_fw);
1924                         rdev->smc_fw = NULL;
1925                         err = 0;
1926                 } else if (rdev->smc_fw->size != smc_req_size) {
1927                         printk(KERN_ERR
1928                                "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1929                                rdev->smc_fw->size, fw_name);
1930                         err = -EINVAL;
1931                 }
1932         }
1933
1934 out:
1935         if (err) {
1936                 if (err != -EINVAL)
1937                         printk(KERN_ERR
1938                                "cik_cp: Failed to load firmware \"%s\"\n",
1939                                fw_name);
1940                 release_firmware(rdev->pfp_fw);
1941                 rdev->pfp_fw = NULL;
1942                 release_firmware(rdev->me_fw);
1943                 rdev->me_fw = NULL;
1944                 release_firmware(rdev->ce_fw);
1945                 rdev->ce_fw = NULL;
1946                 release_firmware(rdev->rlc_fw);
1947                 rdev->rlc_fw = NULL;
1948                 release_firmware(rdev->mc_fw);
1949                 rdev->mc_fw = NULL;
1950                 release_firmware(rdev->smc_fw);
1951                 rdev->smc_fw = NULL;
1952         }
1953         return err;
1954 }
1955
1956 /*
1957  * Core functions
1958  */
1959 /**
1960  * cik_tiling_mode_table_init - init the hw tiling table
1961  *
1962  * @rdev: radeon_device pointer
1963  *
1964  * Starting with SI, the tiling setup is done globally in a
1965  * set of 32 tiling modes.  Rather than selecting each set of
1966  * parameters per surface as on older asics, we just select
1967  * which index in the tiling table we want to use, and the
1968  * surface uses those parameters (CIK).
1969  */
1970 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1971 {
1972         const u32 num_tile_mode_states = 32;
1973         const u32 num_secondary_tile_mode_states = 16;
1974         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1975         u32 num_pipe_configs;
1976         u32 num_rbs = rdev->config.cik.max_backends_per_se *
1977                 rdev->config.cik.max_shader_engines;
1978
1979         switch (rdev->config.cik.mem_row_size_in_kb) {
1980         case 1:
1981                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1982                 break;
1983         case 2:
1984         default:
1985                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1986                 break;
1987         case 4:
1988                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1989                 break;
1990         }
1991
1992         num_pipe_configs = rdev->config.cik.max_tile_pipes;
1993         if (num_pipe_configs > 8)
1994                 num_pipe_configs = 16;
1995
1996         if (num_pipe_configs == 16) {
1997                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1998                         switch (reg_offset) {
1999                         case 0:
2000                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2001                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2002                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2003                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2004                                 break;
2005                         case 1:
2006                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2007                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2008                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2009                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2010                                 break;
2011                         case 2:
2012                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2013                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2014                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2015                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2016                                 break;
2017                         case 3:
2018                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2019                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2020                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2021                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2022                                 break;
2023                         case 4:
2024                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2025                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2026                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2027                                                  TILE_SPLIT(split_equal_to_row_size));
2028                                 break;
2029                         case 5:
2030                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2031                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2032                                 break;
2033                         case 6:
2034                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2035                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2036                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2037                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2038                                 break;
2039                         case 7:
2040                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2041                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2042                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2043                                                  TILE_SPLIT(split_equal_to_row_size));
2044                                 break;
2045                         case 8:
2046                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2047                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2048                                 break;
2049                         case 9:
2050                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2051                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2052                                 break;
2053                         case 10:
2054                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2055                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2056                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2057                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2058                                 break;
2059                         case 11:
2060                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2061                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2062                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2063                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2064                                 break;
2065                         case 12:
2066                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2067                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2068                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2069                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2070                                 break;
2071                         case 13:
2072                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2073                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2074                                 break;
2075                         case 14:
2076                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2077                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2078                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2079                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2080                                 break;
2081                         case 16:
2082                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2083                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2084                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2085                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2086                                 break;
2087                         case 17:
2088                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2089                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2090                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2091                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2092                                 break;
2093                         case 27:
2094                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2095                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2096                                 break;
2097                         case 28:
2098                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2099                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2100                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2101                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2102                                 break;
2103                         case 29:
2104                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2105                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2106                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2107                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2108                                 break;
2109                         case 30:
2110                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2111                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2112                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2113                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2114                                 break;
2115                         default:
2116                                 gb_tile_moden = 0;
2117                                 break;
2118                         }
2119                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2120                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2121                 }
2122                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2123                         switch (reg_offset) {
2124                         case 0:
2125                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2126                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2127                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2128                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2129                                 break;
2130                         case 1:
2131                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2132                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2133                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2134                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2135                                 break;
2136                         case 2:
2137                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2138                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2139                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2140                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2141                                 break;
2142                         case 3:
2143                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2144                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2145                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2146                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2147                                 break;
2148                         case 4:
2149                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2150                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2151                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2152                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2153                                 break;
2154                         case 5:
2155                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2156                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2157                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2158                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2159                                 break;
2160                         case 6:
2161                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2162                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2163                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2164                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2165                                 break;
2166                         case 8:
2167                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2168                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2169                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2170                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2171                                 break;
2172                         case 9:
2173                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2174                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2175                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2176                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2177                                 break;
2178                         case 10:
2179                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2180                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2181                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2182                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2183                                 break;
2184                         case 11:
2185                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2187                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2188                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2189                                 break;
2190                         case 12:
2191                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2192                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2193                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2194                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2195                                 break;
2196                         case 13:
2197                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2199                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2200                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2201                                 break;
2202                         case 14:
2203                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2204                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2205                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2206                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2207                                 break;
2208                         default:
2209                                 gb_tile_moden = 0;
2210                                 break;
2211                         }
2212                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2213                 }
2214         } else if (num_pipe_configs == 8) {
2215                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2216                         switch (reg_offset) {
2217                         case 0:
2218                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2219                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2220                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2221                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2222                                 break;
2223                         case 1:
2224                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2225                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2226                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2227                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2228                                 break;
2229                         case 2:
2230                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2231                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2232                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2233                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2234                                 break;
2235                         case 3:
2236                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2238                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2239                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2240                                 break;
2241                         case 4:
2242                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2244                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2245                                                  TILE_SPLIT(split_equal_to_row_size));
2246                                 break;
2247                         case 5:
2248                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2249                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2250                                 break;
2251                         case 6:
2252                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2253                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2254                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2255                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2256                                 break;
2257                         case 7:
2258                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2259                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2260                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2261                                                  TILE_SPLIT(split_equal_to_row_size));
2262                                 break;
2263                         case 8:
2264                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2265                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2266                                 break;
2267                         case 9:
2268                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2269                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2270                                 break;
2271                         case 10:
2272                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2273                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2274                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2275                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2276                                 break;
2277                         case 11:
2278                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2279                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2280                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2281                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2282                                 break;
2283                         case 12:
2284                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2285                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2286                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2287                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2288                                 break;
2289                         case 13:
2290                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2291                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2292                                 break;
2293                         case 14:
2294                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2296                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2297                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2298                                 break;
2299                         case 16:
2300                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2301                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2302                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2303                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2304                                 break;
2305                         case 17:
2306                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2307                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2308                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2309                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2310                                 break;
2311                         case 27:
2312                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2313                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2314                                 break;
2315                         case 28:
2316                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2318                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2319                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2320                                 break;
2321                         case 29:
2322                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2323                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2324                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2325                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2326                                 break;
2327                         case 30:
2328                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2329                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2330                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2331                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2332                                 break;
2333                         default:
2334                                 gb_tile_moden = 0;
2335                                 break;
2336                         }
2337                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2338                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2339                 }
2340                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2341                         switch (reg_offset) {
2342                         case 0:
2343                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2345                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2346                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2347                                 break;
2348                         case 1:
2349                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2350                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2351                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2352                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2353                                 break;
2354                         case 2:
2355                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2357                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2359                                 break;
2360                         case 3:
2361                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2363                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2364                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2365                                 break;
2366                         case 4:
2367                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2369                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2370                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2371                                 break;
2372                         case 5:
2373                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2374                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2375                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2376                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2377                                 break;
2378                         case 6:
2379                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2382                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2383                                 break;
2384                         case 8:
2385                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2386                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2387                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2388                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2389                                 break;
2390                         case 9:
2391                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2393                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2394                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2395                                 break;
2396                         case 10:
2397                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2399                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2400                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2401                                 break;
2402                         case 11:
2403                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2405                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2406                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2407                                 break;
2408                         case 12:
2409                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2410                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2411                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2412                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2413                                 break;
2414                         case 13:
2415                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2416                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2417                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2418                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2419                                 break;
2420                         case 14:
2421                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2423                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2424                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2425                                 break;
2426                         default:
2427                                 gb_tile_moden = 0;
2428                                 break;
2429                         }
2430                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2431                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2432                 }
2433         } else if (num_pipe_configs == 4) {
2434                 if (num_rbs == 4) {
2435                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2436                                 switch (reg_offset) {
2437                                 case 0:
2438                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2440                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2441                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2442                                         break;
2443                                 case 1:
2444                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2445                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2446                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2447                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2448                                         break;
2449                                 case 2:
2450                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2452                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2453                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2454                                         break;
2455                                 case 3:
2456                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2457                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2458                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2459                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2460                                         break;
2461                                 case 4:
2462                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2463                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2464                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2465                                                          TILE_SPLIT(split_equal_to_row_size));
2466                                         break;
2467                                 case 5:
2468                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2469                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2470                                         break;
2471                                 case 6:
2472                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2473                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2474                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2475                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2476                                         break;
2477                                 case 7:
2478                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2479                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2480                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2481                                                          TILE_SPLIT(split_equal_to_row_size));
2482                                         break;
2483                                 case 8:
2484                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2485                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2486                                         break;
2487                                 case 9:
2488                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2489                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2490                                         break;
2491                                 case 10:
2492                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2493                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2494                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2495                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2496                                         break;
2497                                 case 11:
2498                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2499                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2500                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2501                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2502                                         break;
2503                                 case 12:
2504                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2505                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2506                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2507                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2508                                         break;
2509                                 case 13:
2510                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2511                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2512                                         break;
2513                                 case 14:
2514                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2515                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2516                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2517                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2518                                         break;
2519                                 case 16:
2520                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2521                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2522                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2523                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2524                                         break;
2525                                 case 17:
2526                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2527                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2528                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2529                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2530                                         break;
2531                                 case 27:
2532                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2533                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2534                                         break;
2535                                 case 28:
2536                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2538                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2539                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540                                         break;
2541                                 case 29:
2542                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2543                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2544                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2545                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2546                                         break;
2547                                 case 30:
2548                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2549                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2550                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2551                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552                                         break;
2553                                 default:
2554                                         gb_tile_moden = 0;
2555                                         break;
2556                                 }
2557                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2558                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2559                         }
2560                 } else if (num_rbs < 4) {
2561                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2562                                 switch (reg_offset) {
2563                                 case 0:
2564                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2566                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2567                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2568                                         break;
2569                                 case 1:
2570                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2571                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2572                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2573                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2574                                         break;
2575                                 case 2:
2576                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2577                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2578                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2579                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2580                                         break;
2581                                 case 3:
2582                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2583                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2584                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2585                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2586                                         break;
2587                                 case 4:
2588                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2590                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2591                                                          TILE_SPLIT(split_equal_to_row_size));
2592                                         break;
2593                                 case 5:
2594                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2595                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2596                                         break;
2597                                 case 6:
2598                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2599                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2600                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2601                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2602                                         break;
2603                                 case 7:
2604                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2605                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2606                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2607                                                          TILE_SPLIT(split_equal_to_row_size));
2608                                         break;
2609                                 case 8:
2610                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2611                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2612                                         break;
2613                                 case 9:
2614                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2615                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2616                                         break;
2617                                 case 10:
2618                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2619                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2620                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2621                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2622                                         break;
2623                                 case 11:
2624                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2626                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2627                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2628                                         break;
2629                                 case 12:
2630                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2631                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2632                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2633                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2634                                         break;
2635                                 case 13:
2636                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2637                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2638                                         break;
2639                                 case 14:
2640                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2642                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2643                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644                                         break;
2645                                 case 16:
2646                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2647                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2648                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2649                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650                                         break;
2651                                 case 17:
2652                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2653                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2654                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2655                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2656                                         break;
2657                                 case 27:
2658                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2659                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2660                                         break;
2661                                 case 28:
2662                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2663                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2664                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2665                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2666                                         break;
2667                                 case 29:
2668                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2669                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2670                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2671                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2672                                         break;
2673                                 case 30:
2674                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2675                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2677                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2678                                         break;
2679                                 default:
2680                                         gb_tile_moden = 0;
2681                                         break;
2682                                 }
2683                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2684                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2685                         }
2686                 }
2687                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2688                         switch (reg_offset) {
2689                         case 0:
2690                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2693                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2694                                 break;
2695                         case 1:
2696                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2698                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2699                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2700                                 break;
2701                         case 2:
2702                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2704                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2705                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2706                                 break;
2707                         case 3:
2708                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2710                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2711                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2712                                 break;
2713                         case 4:
2714                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2717                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2718                                 break;
2719                         case 5:
2720                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2722                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2723                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2724                                 break;
2725                         case 6:
2726                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2729                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2730                                 break;
2731                         case 8:
2732                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2733                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2734                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2735                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2736                                 break;
2737                         case 9:
2738                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2739                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2740                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2741                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2742                                 break;
2743                         case 10:
2744                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2746                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2747                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2748                                 break;
2749                         case 11:
2750                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2752                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2753                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2754                                 break;
2755                         case 12:
2756                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2759                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2760                                 break;
2761                         case 13:
2762                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2765                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2766                                 break;
2767                         case 14:
2768                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2770                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2771                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2772                                 break;
2773                         default:
2774                                 gb_tile_moden = 0;
2775                                 break;
2776                         }
2777                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2778                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2779                 }
2780         } else if (num_pipe_configs == 2) {
2781                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2782                         switch (reg_offset) {
2783                         case 0:
2784                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2786                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2787                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2788                                 break;
2789                         case 1:
2790                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2791                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2792                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2793                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2794                                 break;
2795                         case 2:
2796                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2798                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2799                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2800                                 break;
2801                         case 3:
2802                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2803                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2804                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2805                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2806                                 break;
2807                         case 4:
2808                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2810                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2811                                                  TILE_SPLIT(split_equal_to_row_size));
2812                                 break;
2813                         case 5:
2814                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2815                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2816                                 break;
2817                         case 6:
2818                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2819                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2820                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2821                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2822                                 break;
2823                         case 7:
2824                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2825                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2826                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2827                                                  TILE_SPLIT(split_equal_to_row_size));
2828                                 break;
2829                         case 8:
2830                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2831                                 break;
2832                         case 9:
2833                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2834                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2835                                 break;
2836                         case 10:
2837                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2838                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2839                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2840                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2841                                 break;
2842                         case 11:
2843                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2844                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2845                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2846                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2847                                 break;
2848                         case 12:
2849                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2850                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2851                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2852                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2853                                 break;
2854                         case 13:
2855                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2856                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2857                                 break;
2858                         case 14:
2859                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2860                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2861                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2862                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2863                                 break;
2864                         case 16:
2865                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2866                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2867                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2868                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2869                                 break;
2870                         case 17:
2871                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2872                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2873                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2874                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2875                                 break;
2876                         case 27:
2877                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2878                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2879                                 break;
2880                         case 28:
2881                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2882                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2883                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2884                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2885                                 break;
2886                         case 29:
2887                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2888                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2889                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2890                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2891                                 break;
2892                         case 30:
2893                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2894                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2895                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2896                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2897                                 break;
2898                         default:
2899                                 gb_tile_moden = 0;
2900                                 break;
2901                         }
2902                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2903                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2904                 }
2905                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2906                         switch (reg_offset) {
2907                         case 0:
2908                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2909                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2910                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2911                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2912                                 break;
2913                         case 1:
2914                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2915                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2916                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2917                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2918                                 break;
2919                         case 2:
2920                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2921                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2922                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2923                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2924                                 break;
2925                         case 3:
2926                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2927                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2928                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2929                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2930                                 break;
2931                         case 4:
2932                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2933                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2934                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2935                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2936                                 break;
2937                         case 5:
2938                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2940                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2941                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2942                                 break;
2943                         case 6:
2944                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2945                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2946                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2947                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2948                                 break;
2949                         case 8:
2950                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2951                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2952                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2953                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2954                                 break;
2955                         case 9:
2956                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2957                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2958                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2959                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2960                                 break;
2961                         case 10:
2962                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2963                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2964                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2965                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2966                                 break;
2967                         case 11:
2968                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2969                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2972                                 break;
2973                         case 12:
2974                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2975                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2976                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2977                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2978                                 break;
2979                         case 13:
2980                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2983                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2984                                 break;
2985                         case 14:
2986                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2987                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2988                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2989                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2990                                 break;
2991                         default:
2992                                 gb_tile_moden = 0;
2993                                 break;
2994                         }
2995                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2996                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2997                 }
2998         } else
2999                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3000 }
3001
3002 /**
3003  * cik_select_se_sh - select which SE, SH to address
3004  *
3005  * @rdev: radeon_device pointer
3006  * @se_num: shader engine to address
3007  * @sh_num: sh block to address
3008  *
3009  * Select which SE, SH combinations to address. Certain
3010  * registers are instanced per SE or SH.  0xffffffff means
3011  * broadcast to all SEs or SHs (CIK).
3012  */
3013 static void cik_select_se_sh(struct radeon_device *rdev,
3014                              u32 se_num, u32 sh_num)
3015 {
3016         u32 data = INSTANCE_BROADCAST_WRITES;
3017
3018         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3019                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3020         else if (se_num == 0xffffffff)
3021                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3022         else if (sh_num == 0xffffffff)
3023                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3024         else
3025                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3026         WREG32(GRBM_GFX_INDEX, data);
3027 }
3028
3029 /**
3030  * cik_create_bitmask - create a bitmask
3031  *
3032  * @bit_width: length of the mask
3033  *
3034  * create a variable length bit mask (CIK).
3035  * Returns the bitmask.
3036  */
3037 static u32 cik_create_bitmask(u32 bit_width)
3038 {
3039         u32 i, mask = 0;
3040
3041         for (i = 0; i < bit_width; i++) {
3042                 mask <<= 1;
3043                 mask |= 1;
3044         }
3045         return mask;
3046 }
3047
3048 /**
3049  * cik_select_se_sh - select which SE, SH to address
3050  *
3051  * @rdev: radeon_device pointer
3052  * @max_rb_num: max RBs (render backends) for the asic
3053  * @se_num: number of SEs (shader engines) for the asic
3054  * @sh_per_se: number of SH blocks per SE for the asic
3055  *
3056  * Calculates the bitmask of disabled RBs (CIK).
3057  * Returns the disabled RB bitmask.
3058  */
3059 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3060                               u32 max_rb_num, u32 se_num,
3061                               u32 sh_per_se)
3062 {
3063         u32 data, mask;
3064
3065         data = RREG32(CC_RB_BACKEND_DISABLE);
3066         if (data & 1)
3067                 data &= BACKEND_DISABLE_MASK;
3068         else
3069                 data = 0;
3070         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3071
3072         data >>= BACKEND_DISABLE_SHIFT;
3073
3074         mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
3075
3076         return data & mask;
3077 }
3078
3079 /**
3080  * cik_setup_rb - setup the RBs on the asic
3081  *
3082  * @rdev: radeon_device pointer
3083  * @se_num: number of SEs (shader engines) for the asic
3084  * @sh_per_se: number of SH blocks per SE for the asic
3085  * @max_rb_num: max RBs (render backends) for the asic
3086  *
3087  * Configures per-SE/SH RB registers (CIK).
3088  */
3089 static void cik_setup_rb(struct radeon_device *rdev,
3090                          u32 se_num, u32 sh_per_se,
3091                          u32 max_rb_num)
3092 {
3093         int i, j;
3094         u32 data, mask;
3095         u32 disabled_rbs = 0;
3096         u32 enabled_rbs = 0;
3097
3098         for (i = 0; i < se_num; i++) {
3099                 for (j = 0; j < sh_per_se; j++) {
3100                         cik_select_se_sh(rdev, i, j);
3101                         data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
3102                         if (rdev->family == CHIP_HAWAII)
3103                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3104                         else
3105                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3106                 }
3107         }
3108         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3109
3110         mask = 1;
3111         for (i = 0; i < max_rb_num; i++) {
3112                 if (!(disabled_rbs & mask))
3113                         enabled_rbs |= mask;
3114                 mask <<= 1;
3115         }
3116
3117         for (i = 0; i < se_num; i++) {
3118                 cik_select_se_sh(rdev, i, 0xffffffff);
3119                 data = 0;
3120                 for (j = 0; j < sh_per_se; j++) {
3121                         switch (enabled_rbs & 3) {
3122                         case 0:
3123                                 if (j == 0)
3124                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3125                                 else
3126                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3127                                 break;
3128                         case 1:
3129                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3130                                 break;
3131                         case 2:
3132                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3133                                 break;
3134                         case 3:
3135                         default:
3136                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3137                                 break;
3138                         }
3139                         enabled_rbs >>= 2;
3140                 }
3141                 WREG32(PA_SC_RASTER_CONFIG, data);
3142         }
3143         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3144 }
3145
3146 /**
3147  * cik_gpu_init - setup the 3D engine
3148  *
3149  * @rdev: radeon_device pointer
3150  *
3151  * Configures the 3D engine and tiling configuration
3152  * registers so that the 3D engine is usable.
3153  */
3154 static void cik_gpu_init(struct radeon_device *rdev)
3155 {
3156         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3157         u32 mc_shared_chmap, mc_arb_ramcfg;
3158         u32 hdp_host_path_cntl;
3159         u32 tmp;
3160         int i, j;
3161
3162         switch (rdev->family) {
3163         case CHIP_BONAIRE:
3164                 rdev->config.cik.max_shader_engines = 2;
3165                 rdev->config.cik.max_tile_pipes = 4;
3166                 rdev->config.cik.max_cu_per_sh = 7;
3167                 rdev->config.cik.max_sh_per_se = 1;
3168                 rdev->config.cik.max_backends_per_se = 2;
3169                 rdev->config.cik.max_texture_channel_caches = 4;
3170                 rdev->config.cik.max_gprs = 256;
3171                 rdev->config.cik.max_gs_threads = 32;
3172                 rdev->config.cik.max_hw_contexts = 8;
3173
3174                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3175                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3176                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3177                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3178                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3179                 break;
3180         case CHIP_HAWAII:
3181                 rdev->config.cik.max_shader_engines = 4;
3182                 rdev->config.cik.max_tile_pipes = 16;
3183                 rdev->config.cik.max_cu_per_sh = 11;
3184                 rdev->config.cik.max_sh_per_se = 1;
3185                 rdev->config.cik.max_backends_per_se = 4;
3186                 rdev->config.cik.max_texture_channel_caches = 16;
3187                 rdev->config.cik.max_gprs = 256;
3188                 rdev->config.cik.max_gs_threads = 32;
3189                 rdev->config.cik.max_hw_contexts = 8;
3190
3191                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3192                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3193                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3194                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3195                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3196                 break;
3197         case CHIP_KAVERI:
3198                 rdev->config.cik.max_shader_engines = 1;
3199                 rdev->config.cik.max_tile_pipes = 4;
3200                 if ((rdev->pdev->device == 0x1304) ||
3201                     (rdev->pdev->device == 0x1305) ||
3202                     (rdev->pdev->device == 0x130C) ||
3203                     (rdev->pdev->device == 0x130F) ||
3204                     (rdev->pdev->device == 0x1310) ||
3205                     (rdev->pdev->device == 0x1311) ||
3206                     (rdev->pdev->device == 0x131C)) {
3207                         rdev->config.cik.max_cu_per_sh = 8;
3208                         rdev->config.cik.max_backends_per_se = 2;
3209                 } else if ((rdev->pdev->device == 0x1309) ||
3210                            (rdev->pdev->device == 0x130A) ||
3211                            (rdev->pdev->device == 0x130D) ||
3212                            (rdev->pdev->device == 0x1313) ||
3213                            (rdev->pdev->device == 0x131D)) {
3214                         rdev->config.cik.max_cu_per_sh = 6;
3215                         rdev->config.cik.max_backends_per_se = 2;
3216                 } else if ((rdev->pdev->device == 0x1306) ||
3217                            (rdev->pdev->device == 0x1307) ||
3218                            (rdev->pdev->device == 0x130B) ||
3219                            (rdev->pdev->device == 0x130E) ||
3220                            (rdev->pdev->device == 0x1315) ||
3221                            (rdev->pdev->device == 0x131B)) {
3222                         rdev->config.cik.max_cu_per_sh = 4;
3223                         rdev->config.cik.max_backends_per_se = 1;
3224                 } else {
3225                         rdev->config.cik.max_cu_per_sh = 3;
3226                         rdev->config.cik.max_backends_per_se = 1;
3227                 }
3228                 rdev->config.cik.max_sh_per_se = 1;
3229                 rdev->config.cik.max_texture_channel_caches = 4;
3230                 rdev->config.cik.max_gprs = 256;
3231                 rdev->config.cik.max_gs_threads = 16;
3232                 rdev->config.cik.max_hw_contexts = 8;
3233
3234                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3235                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3236                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3237                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3238                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3239                 break;
3240         case CHIP_KABINI:
3241         default:
3242                 rdev->config.cik.max_shader_engines = 1;
3243                 rdev->config.cik.max_tile_pipes = 2;
3244                 rdev->config.cik.max_cu_per_sh = 2;
3245                 rdev->config.cik.max_sh_per_se = 1;
3246                 rdev->config.cik.max_backends_per_se = 1;
3247                 rdev->config.cik.max_texture_channel_caches = 2;
3248                 rdev->config.cik.max_gprs = 256;
3249                 rdev->config.cik.max_gs_threads = 16;
3250                 rdev->config.cik.max_hw_contexts = 8;
3251
3252                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3253                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3254                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3255                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3256                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3257                 break;
3258         }
3259
3260         /* Initialize HDP */
3261         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3262                 WREG32((0x2c14 + j), 0x00000000);
3263                 WREG32((0x2c18 + j), 0x00000000);
3264                 WREG32((0x2c1c + j), 0x00000000);
3265                 WREG32((0x2c20 + j), 0x00000000);
3266                 WREG32((0x2c24 + j), 0x00000000);
3267         }
3268
3269         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3270
3271         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3272
3273         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3274         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3275
3276         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3277         rdev->config.cik.mem_max_burst_length_bytes = 256;
3278         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3279         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3280         if (rdev->config.cik.mem_row_size_in_kb > 4)
3281                 rdev->config.cik.mem_row_size_in_kb = 4;
3282         /* XXX use MC settings? */
3283         rdev->config.cik.shader_engine_tile_size = 32;
3284         rdev->config.cik.num_gpus = 1;
3285         rdev->config.cik.multi_gpu_tile_size = 64;
3286
3287         /* fix up row size */
3288         gb_addr_config &= ~ROW_SIZE_MASK;
3289         switch (rdev->config.cik.mem_row_size_in_kb) {
3290         case 1:
3291         default:
3292                 gb_addr_config |= ROW_SIZE(0);
3293                 break;
3294         case 2:
3295                 gb_addr_config |= ROW_SIZE(1);
3296                 break;
3297         case 4:
3298                 gb_addr_config |= ROW_SIZE(2);
3299                 break;
3300         }
3301
3302         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3303          * not have bank info, so create a custom tiling dword.
3304          * bits 3:0   num_pipes
3305          * bits 7:4   num_banks
3306          * bits 11:8  group_size
3307          * bits 15:12 row_size
3308          */
3309         rdev->config.cik.tile_config = 0;
3310         switch (rdev->config.cik.num_tile_pipes) {
3311         case 1:
3312                 rdev->config.cik.tile_config |= (0 << 0);
3313                 break;
3314         case 2:
3315                 rdev->config.cik.tile_config |= (1 << 0);
3316                 break;
3317         case 4:
3318                 rdev->config.cik.tile_config |= (2 << 0);
3319                 break;
3320         case 8:
3321         default:
3322                 /* XXX what about 12? */
3323                 rdev->config.cik.tile_config |= (3 << 0);
3324                 break;
3325         }
3326         rdev->config.cik.tile_config |=
3327                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3328         rdev->config.cik.tile_config |=
3329                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3330         rdev->config.cik.tile_config |=
3331                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3332
3333         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3334         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3335         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3336         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3337         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3338         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3339         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3340         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3341
3342         cik_tiling_mode_table_init(rdev);
3343
3344         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3345                      rdev->config.cik.max_sh_per_se,
3346                      rdev->config.cik.max_backends_per_se);
3347
3348         /* set HW defaults for 3D engine */
3349         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3350
3351         WREG32(SX_DEBUG_1, 0x20);
3352
3353         WREG32(TA_CNTL_AUX, 0x00010000);
3354
3355         tmp = RREG32(SPI_CONFIG_CNTL);
3356         tmp |= 0x03000000;
3357         WREG32(SPI_CONFIG_CNTL, tmp);
3358
3359         WREG32(SQ_CONFIG, 1);
3360
3361         WREG32(DB_DEBUG, 0);
3362
3363         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3364         tmp |= 0x00000400;
3365         WREG32(DB_DEBUG2, tmp);
3366
3367         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3368         tmp |= 0x00020200;
3369         WREG32(DB_DEBUG3, tmp);
3370
3371         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3372         tmp |= 0x00018208;
3373         WREG32(CB_HW_CONTROL, tmp);
3374
3375         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3376
3377         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3378                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3379                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3380                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3381
3382         WREG32(VGT_NUM_INSTANCES, 1);
3383
3384         WREG32(CP_PERFMON_CNTL, 0);
3385
3386         WREG32(SQ_CONFIG, 0);
3387
3388         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3389                                           FORCE_EOV_MAX_REZ_CNT(255)));
3390
3391         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3392                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3393
3394         WREG32(VGT_GS_VERTEX_REUSE, 16);
3395         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3396
3397         tmp = RREG32(HDP_MISC_CNTL);
3398         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3399         WREG32(HDP_MISC_CNTL, tmp);
3400
3401         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3402         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3403
3404         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3405         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3406
3407         udelay(50);
3408 }
3409
3410 /*
3411  * GPU scratch registers helpers function.
3412  */
3413 /**
3414  * cik_scratch_init - setup driver info for CP scratch regs
3415  *
3416  * @rdev: radeon_device pointer
3417  *
3418  * Set up the number and offset of the CP scratch registers.
3419  * NOTE: use of CP scratch registers is a legacy inferface and
3420  * is not used by default on newer asics (r6xx+).  On newer asics,
3421  * memory buffers are used for fences rather than scratch regs.
3422  */
3423 static void cik_scratch_init(struct radeon_device *rdev)
3424 {
3425         int i;
3426
3427         rdev->scratch.num_reg = 7;
3428         rdev->scratch.reg_base = SCRATCH_REG0;
3429         for (i = 0; i < rdev->scratch.num_reg; i++) {
3430                 rdev->scratch.free[i] = true;
3431                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3432         }
3433 }
3434
3435 /**
3436  * cik_ring_test - basic gfx ring test
3437  *
3438  * @rdev: radeon_device pointer
3439  * @ring: radeon_ring structure holding ring information
3440  *
3441  * Allocate a scratch register and write to it using the gfx ring (CIK).
3442  * Provides a basic gfx ring test to verify that the ring is working.
3443  * Used by cik_cp_gfx_resume();
3444  * Returns 0 on success, error on failure.
3445  */
3446 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3447 {
3448         uint32_t scratch;
3449         uint32_t tmp = 0;
3450         unsigned i;
3451         int r;
3452
3453         r = radeon_scratch_get(rdev, &scratch);
3454         if (r) {
3455                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3456                 return r;
3457         }
3458         WREG32(scratch, 0xCAFEDEAD);
3459         r = radeon_ring_lock(rdev, ring, 3);
3460         if (r) {
3461                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3462                 radeon_scratch_free(rdev, scratch);
3463                 return r;
3464         }
3465         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3466         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3467         radeon_ring_write(ring, 0xDEADBEEF);
3468         radeon_ring_unlock_commit(rdev, ring);
3469
3470         for (i = 0; i < rdev->usec_timeout; i++) {
3471                 tmp = RREG32(scratch);
3472                 if (tmp == 0xDEADBEEF)
3473                         break;
3474                 DRM_UDELAY(1);
3475         }
3476         if (i < rdev->usec_timeout) {
3477                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3478         } else {
3479                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3480                           ring->idx, scratch, tmp);
3481                 r = -EINVAL;
3482         }
3483         radeon_scratch_free(rdev, scratch);
3484         return r;
3485 }
3486
3487 /**
3488  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3489  *
3490  * @rdev: radeon_device pointer
3491  * @fence: radeon fence object
3492  *
3493  * Emits a fence sequnce number on the gfx ring and flushes
3494  * GPU caches.
3495  */
3496 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3497                              struct radeon_fence *fence)
3498 {
3499         struct radeon_ring *ring = &rdev->ring[fence->ring];
3500         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3501
3502         /* EVENT_WRITE_EOP - flush caches, send int */
3503         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3504         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3505                                  EOP_TC_ACTION_EN |
3506                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3507                                  EVENT_INDEX(5)));
3508         radeon_ring_write(ring, addr & 0xfffffffc);
3509         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3510         radeon_ring_write(ring, fence->seq);
3511         radeon_ring_write(ring, 0);
3512         /* HDP flush */
3513         /* We should be using the new WAIT_REG_MEM special op packet here
3514          * but it causes the CP to hang
3515          */
3516         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3517         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3518                                  WRITE_DATA_DST_SEL(0)));
3519         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3520         radeon_ring_write(ring, 0);
3521         radeon_ring_write(ring, 0);
3522 }
3523
3524 /**
3525  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3526  *
3527  * @rdev: radeon_device pointer
3528  * @fence: radeon fence object
3529  *
3530  * Emits a fence sequnce number on the compute ring and flushes
3531  * GPU caches.
3532  */
3533 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3534                                  struct radeon_fence *fence)
3535 {
3536         struct radeon_ring *ring = &rdev->ring[fence->ring];
3537         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3538
3539         /* RELEASE_MEM - flush caches, send int */
3540         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3541         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3542                                  EOP_TC_ACTION_EN |
3543                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3544                                  EVENT_INDEX(5)));
3545         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3546         radeon_ring_write(ring, addr & 0xfffffffc);
3547         radeon_ring_write(ring, upper_32_bits(addr));
3548         radeon_ring_write(ring, fence->seq);
3549         radeon_ring_write(ring, 0);
3550         /* HDP flush */
3551         /* We should be using the new WAIT_REG_MEM special op packet here
3552          * but it causes the CP to hang
3553          */
3554         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3555         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3556                                  WRITE_DATA_DST_SEL(0)));
3557         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3558         radeon_ring_write(ring, 0);
3559         radeon_ring_write(ring, 0);
3560 }
3561
3562 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3563                              struct radeon_ring *ring,
3564                              struct radeon_semaphore *semaphore,
3565                              bool emit_wait)
3566 {
3567 /* TODO: figure out why semaphore cause lockups */
3568 #if 0
3569         uint64_t addr = semaphore->gpu_addr;
3570         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3571
3572         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3573         radeon_ring_write(ring, addr & 0xffffffff);
3574         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3575
3576         return true;
3577 #else
3578         return false;
3579 #endif
3580 }
3581
3582 /**
3583  * cik_copy_cpdma - copy pages using the CP DMA engine
3584  *
3585  * @rdev: radeon_device pointer
3586  * @src_offset: src GPU address
3587  * @dst_offset: dst GPU address
3588  * @num_gpu_pages: number of GPU pages to xfer
3589  * @fence: radeon fence object
3590  *
3591  * Copy GPU paging using the CP DMA engine (CIK+).
3592  * Used by the radeon ttm implementation to move pages if
3593  * registered as the asic copy callback.
3594  */
3595 int cik_copy_cpdma(struct radeon_device *rdev,
3596                    uint64_t src_offset, uint64_t dst_offset,
3597                    unsigned num_gpu_pages,
3598                    struct radeon_fence **fence)
3599 {
3600         struct radeon_semaphore *sem = NULL;
3601         int ring_index = rdev->asic->copy.blit_ring_index;
3602         struct radeon_ring *ring = &rdev->ring[ring_index];
3603         u32 size_in_bytes, cur_size_in_bytes, control;
3604         int i, num_loops;
3605         int r = 0;
3606
3607         r = radeon_semaphore_create(rdev, &sem);
3608         if (r) {
3609                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3610                 return r;
3611         }
3612
3613         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3614         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3615         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3616         if (r) {
3617                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3618                 radeon_semaphore_free(rdev, &sem, NULL);
3619                 return r;
3620         }
3621
3622         radeon_semaphore_sync_to(sem, *fence);
3623         radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3624
3625         for (i = 0; i < num_loops; i++) {
3626                 cur_size_in_bytes = size_in_bytes;
3627                 if (cur_size_in_bytes > 0x1fffff)
3628                         cur_size_in_bytes = 0x1fffff;
3629                 size_in_bytes -= cur_size_in_bytes;
3630                 control = 0;
3631                 if (size_in_bytes == 0)
3632                         control |= PACKET3_DMA_DATA_CP_SYNC;
3633                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3634                 radeon_ring_write(ring, control);
3635                 radeon_ring_write(ring, lower_32_bits(src_offset));
3636                 radeon_ring_write(ring, upper_32_bits(src_offset));
3637                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3638                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3639                 radeon_ring_write(ring, cur_size_in_bytes);
3640                 src_offset += cur_size_in_bytes;
3641                 dst_offset += cur_size_in_bytes;
3642         }
3643
3644         r = radeon_fence_emit(rdev, fence, ring->idx);
3645         if (r) {
3646                 radeon_ring_unlock_undo(rdev, ring);
3647                 return r;
3648         }
3649
3650         radeon_ring_unlock_commit(rdev, ring);
3651         radeon_semaphore_free(rdev, &sem, *fence);
3652
3653         return r;
3654 }
3655
3656 /*
3657  * IB stuff
3658  */
3659 /**
3660  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3661  *
3662  * @rdev: radeon_device pointer
3663  * @ib: radeon indirect buffer object
3664  *
3665  * Emits an DE (drawing engine) or CE (constant engine) IB
3666  * on the gfx ring.  IBs are usually generated by userspace
3667  * acceleration drivers and submitted to the kernel for
3668  * sheduling on the ring.  This function schedules the IB
3669  * on the gfx ring for execution by the GPU.
3670  */
3671 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3672 {
3673         struct radeon_ring *ring = &rdev->ring[ib->ring];
3674         u32 header, control = INDIRECT_BUFFER_VALID;
3675
3676         if (ib->is_const_ib) {
3677                 /* set switch buffer packet before const IB */
3678                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3679                 radeon_ring_write(ring, 0);
3680
3681                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3682         } else {
3683                 u32 next_rptr;
3684                 if (ring->rptr_save_reg) {
3685                         next_rptr = ring->wptr + 3 + 4;
3686                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3687                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3688                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3689                         radeon_ring_write(ring, next_rptr);
3690                 } else if (rdev->wb.enabled) {
3691                         next_rptr = ring->wptr + 5 + 4;
3692                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3693                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3694                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3695                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3696                         radeon_ring_write(ring, next_rptr);
3697                 }
3698
3699                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3700         }
3701
3702         control |= ib->length_dw |
3703                 (ib->vm ? (ib->vm->id << 24) : 0);
3704
3705         radeon_ring_write(ring, header);
3706         radeon_ring_write(ring,
3707 #ifdef __BIG_ENDIAN
3708                           (2 << 0) |
3709 #endif
3710                           (ib->gpu_addr & 0xFFFFFFFC));
3711         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3712         radeon_ring_write(ring, control);
3713 }
3714
3715 /**
3716  * cik_ib_test - basic gfx ring IB test
3717  *
3718  * @rdev: radeon_device pointer
3719  * @ring: radeon_ring structure holding ring information
3720  *
3721  * Allocate an IB and execute it on the gfx ring (CIK).
3722  * Provides a basic gfx ring test to verify that IBs are working.
3723  * Returns 0 on success, error on failure.
3724  */
3725 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3726 {
3727         struct radeon_ib ib;
3728         uint32_t scratch;
3729         uint32_t tmp = 0;
3730         unsigned i;
3731         int r;
3732
3733         r = radeon_scratch_get(rdev, &scratch);
3734         if (r) {
3735                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3736                 return r;
3737         }
3738         WREG32(scratch, 0xCAFEDEAD);
3739         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3740         if (r) {
3741                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3742                 radeon_scratch_free(rdev, scratch);
3743                 return r;
3744         }
3745         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3746         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3747         ib.ptr[2] = 0xDEADBEEF;
3748         ib.length_dw = 3;
3749         r = radeon_ib_schedule(rdev, &ib, NULL);
3750         if (r) {
3751                 radeon_scratch_free(rdev, scratch);
3752                 radeon_ib_free(rdev, &ib);
3753                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3754                 return r;
3755         }
3756         r = radeon_fence_wait(ib.fence, false);
3757         if (r) {
3758                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3759                 radeon_scratch_free(rdev, scratch);
3760                 radeon_ib_free(rdev, &ib);
3761                 return r;
3762         }
3763         for (i = 0; i < rdev->usec_timeout; i++) {
3764                 tmp = RREG32(scratch);
3765                 if (tmp == 0xDEADBEEF)
3766                         break;
3767                 DRM_UDELAY(1);
3768         }
3769         if (i < rdev->usec_timeout) {
3770                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3771         } else {
3772                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3773                           scratch, tmp);
3774                 r = -EINVAL;
3775         }
3776         radeon_scratch_free(rdev, scratch);
3777         radeon_ib_free(rdev, &ib);
3778         return r;
3779 }
3780
3781 /*
3782  * CP.
3783  * On CIK, gfx and compute now have independant command processors.
3784  *
3785  * GFX
3786  * Gfx consists of a single ring and can process both gfx jobs and
3787  * compute jobs.  The gfx CP consists of three microengines (ME):
3788  * PFP - Pre-Fetch Parser
3789  * ME - Micro Engine
3790  * CE - Constant Engine
3791  * The PFP and ME make up what is considered the Drawing Engine (DE).
3792  * The CE is an asynchronous engine used for updating buffer desciptors
3793  * used by the DE so that they can be loaded into cache in parallel
3794  * while the DE is processing state update packets.
3795  *
3796  * Compute
3797  * The compute CP consists of two microengines (ME):
3798  * MEC1 - Compute MicroEngine 1
3799  * MEC2 - Compute MicroEngine 2
3800  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3801  * The queues are exposed to userspace and are programmed directly
3802  * by the compute runtime.
3803  */
3804 /**
3805  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3806  *
3807  * @rdev: radeon_device pointer
3808  * @enable: enable or disable the MEs
3809  *
3810  * Halts or unhalts the gfx MEs.
3811  */
3812 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3813 {
3814         if (enable)
3815                 WREG32(CP_ME_CNTL, 0);
3816         else {
3817                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3818                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3819         }
3820         udelay(50);
3821 }
3822
3823 /**
3824  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3825  *
3826  * @rdev: radeon_device pointer
3827  *
3828  * Loads the gfx PFP, ME, and CE ucode.
3829  * Returns 0 for success, -EINVAL if the ucode is not available.
3830  */
3831 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3832 {
3833         const __be32 *fw_data;
3834         int i;
3835
3836         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3837                 return -EINVAL;
3838
3839         cik_cp_gfx_enable(rdev, false);
3840
3841         /* PFP */
3842         fw_data = (const __be32 *)rdev->pfp_fw->data;
3843         WREG32(CP_PFP_UCODE_ADDR, 0);
3844         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3845                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3846         WREG32(CP_PFP_UCODE_ADDR, 0);
3847
3848         /* CE */
3849         fw_data = (const __be32 *)rdev->ce_fw->data;
3850         WREG32(CP_CE_UCODE_ADDR, 0);
3851         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3852                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3853         WREG32(CP_CE_UCODE_ADDR, 0);
3854
3855         /* ME */
3856         fw_data = (const __be32 *)rdev->me_fw->data;
3857         WREG32(CP_ME_RAM_WADDR, 0);
3858         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3859                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3860         WREG32(CP_ME_RAM_WADDR, 0);
3861
3862         WREG32(CP_PFP_UCODE_ADDR, 0);
3863         WREG32(CP_CE_UCODE_ADDR, 0);
3864         WREG32(CP_ME_RAM_WADDR, 0);
3865         WREG32(CP_ME_RAM_RADDR, 0);
3866         return 0;
3867 }
3868
3869 /**
3870  * cik_cp_gfx_start - start the gfx ring
3871  *
3872  * @rdev: radeon_device pointer
3873  *
3874  * Enables the ring and loads the clear state context and other
3875  * packets required to init the ring.
3876  * Returns 0 for success, error for failure.
3877  */
3878 static int cik_cp_gfx_start(struct radeon_device *rdev)
3879 {
3880         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3881         int r, i;
3882
3883         /* init the CP */
3884         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3885         WREG32(CP_ENDIAN_SWAP, 0);
3886         WREG32(CP_DEVICE_ID, 1);
3887
3888         cik_cp_gfx_enable(rdev, true);
3889
3890         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3891         if (r) {
3892                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3893                 return r;
3894         }
3895
3896         /* init the CE partitions.  CE only used for gfx on CIK */
3897         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3898         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3899         radeon_ring_write(ring, 0xc000);
3900         radeon_ring_write(ring, 0xc000);
3901
3902         /* setup clear context state */
3903         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3904         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3905
3906         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3907         radeon_ring_write(ring, 0x80000000);
3908         radeon_ring_write(ring, 0x80000000);
3909
3910         for (i = 0; i < cik_default_size; i++)
3911                 radeon_ring_write(ring, cik_default_state[i]);
3912
3913         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3914         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3915
3916         /* set clear context state */
3917         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3918         radeon_ring_write(ring, 0);
3919
3920         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3921         radeon_ring_write(ring, 0x00000316);
3922         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3923         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3924
3925         radeon_ring_unlock_commit(rdev, ring);
3926
3927         return 0;
3928 }
3929
3930 /**
3931  * cik_cp_gfx_fini - stop the gfx ring
3932  *
3933  * @rdev: radeon_device pointer
3934  *
3935  * Stop the gfx ring and tear down the driver ring
3936  * info.
3937  */
3938 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3939 {
3940         cik_cp_gfx_enable(rdev, false);
3941         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3942 }
3943
3944 /**
3945  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3946  *
3947  * @rdev: radeon_device pointer
3948  *
3949  * Program the location and size of the gfx ring buffer
3950  * and test it to make sure it's working.
3951  * Returns 0 for success, error for failure.
3952  */
3953 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3954 {
3955         struct radeon_ring *ring;
3956         u32 tmp;
3957         u32 rb_bufsz;
3958         u64 rb_addr;
3959         int r;
3960
3961         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3962         if (rdev->family != CHIP_HAWAII)
3963                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3964
3965         /* Set the write pointer delay */
3966         WREG32(CP_RB_WPTR_DELAY, 0);
3967
3968         /* set the RB to use vmid 0 */
3969         WREG32(CP_RB_VMID, 0);
3970
3971         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3972
3973         /* ring 0 - compute and gfx */
3974         /* Set ring buffer size */
3975         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3976         rb_bufsz = order_base_2(ring->ring_size / 8);
3977         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3978 #ifdef __BIG_ENDIAN
3979         tmp |= BUF_SWAP_32BIT;
3980 #endif
3981         WREG32(CP_RB0_CNTL, tmp);
3982
3983         /* Initialize the ring buffer's read and write pointers */
3984         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3985         ring->wptr = 0;
3986         WREG32(CP_RB0_WPTR, ring->wptr);
3987
3988         /* set the wb address wether it's enabled or not */
3989         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3990         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3991
3992         /* scratch register shadowing is no longer supported */
3993         WREG32(SCRATCH_UMSK, 0);
3994
3995         if (!rdev->wb.enabled)
3996                 tmp |= RB_NO_UPDATE;
3997
3998         mdelay(1);
3999         WREG32(CP_RB0_CNTL, tmp);
4000
4001         rb_addr = ring->gpu_addr >> 8;
4002         WREG32(CP_RB0_BASE, rb_addr);
4003         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4004
4005         ring->rptr = RREG32(CP_RB0_RPTR);
4006
4007         /* start the ring */
4008         cik_cp_gfx_start(rdev);
4009         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4010         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4011         if (r) {
4012                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4013                 return r;
4014         }
4015         return 0;
4016 }
4017
4018 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
4019                               struct radeon_ring *ring)
4020 {
4021         u32 rptr;
4022
4023
4024
4025         if (rdev->wb.enabled) {
4026                 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
4027         } else {
4028                 mutex_lock(&rdev->srbm_mutex);
4029                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4030                 rptr = RREG32(CP_HQD_PQ_RPTR);
4031                 cik_srbm_select(rdev, 0, 0, 0, 0);
4032                 mutex_unlock(&rdev->srbm_mutex);
4033         }
4034
4035         return rptr;
4036 }
4037
4038 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
4039                               struct radeon_ring *ring)
4040 {
4041         u32 wptr;
4042
4043         if (rdev->wb.enabled) {
4044                 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
4045         } else {
4046                 mutex_lock(&rdev->srbm_mutex);
4047                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4048                 wptr = RREG32(CP_HQD_PQ_WPTR);
4049                 cik_srbm_select(rdev, 0, 0, 0, 0);
4050                 mutex_unlock(&rdev->srbm_mutex);
4051         }
4052
4053         return wptr;
4054 }
4055
4056 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
4057                                struct radeon_ring *ring)
4058 {
4059         rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
4060         WDOORBELL32(ring->doorbell_index, ring->wptr);
4061 }
4062
4063 /**
4064  * cik_cp_compute_enable - enable/disable the compute CP MEs
4065  *
4066  * @rdev: radeon_device pointer
4067  * @enable: enable or disable the MEs
4068  *
4069  * Halts or unhalts the compute MEs.
4070  */
4071 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4072 {
4073         if (enable)
4074                 WREG32(CP_MEC_CNTL, 0);
4075         else
4076                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4077         udelay(50);
4078 }
4079
4080 /**
4081  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4082  *
4083  * @rdev: radeon_device pointer
4084  *
4085  * Loads the compute MEC1&2 ucode.
4086  * Returns 0 for success, -EINVAL if the ucode is not available.
4087  */
4088 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4089 {
4090         const __be32 *fw_data;
4091         int i;
4092
4093         if (!rdev->mec_fw)
4094                 return -EINVAL;
4095
4096         cik_cp_compute_enable(rdev, false);
4097
4098         /* MEC1 */
4099         fw_data = (const __be32 *)rdev->mec_fw->data;
4100         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4101         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4102                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4103         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4104
4105         if (rdev->family == CHIP_KAVERI) {
4106                 /* MEC2 */
4107                 fw_data = (const __be32 *)rdev->mec_fw->data;
4108                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4109                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4110                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4111                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4112         }
4113
4114         return 0;
4115 }
4116
4117 /**
4118  * cik_cp_compute_start - start the compute queues
4119  *
4120  * @rdev: radeon_device pointer
4121  *
4122  * Enable the compute queues.
4123  * Returns 0 for success, error for failure.
4124  */
4125 static int cik_cp_compute_start(struct radeon_device *rdev)
4126 {
4127         cik_cp_compute_enable(rdev, true);
4128
4129         return 0;
4130 }
4131
4132 /**
4133  * cik_cp_compute_fini - stop the compute queues
4134  *
4135  * @rdev: radeon_device pointer
4136  *
4137  * Stop the compute queues and tear down the driver queue
4138  * info.
4139  */
4140 static void cik_cp_compute_fini(struct radeon_device *rdev)
4141 {
4142         int i, idx, r;
4143
4144         cik_cp_compute_enable(rdev, false);
4145
4146         for (i = 0; i < 2; i++) {
4147                 if (i == 0)
4148                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4149                 else
4150                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4151
4152                 if (rdev->ring[idx].mqd_obj) {
4153                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4154                         if (unlikely(r != 0))
4155                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4156
4157                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4158                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4159
4160                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4161                         rdev->ring[idx].mqd_obj = NULL;
4162                 }
4163         }
4164 }
4165
4166 static void cik_mec_fini(struct radeon_device *rdev)
4167 {
4168         int r;
4169
4170         if (rdev->mec.hpd_eop_obj) {
4171                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4172                 if (unlikely(r != 0))
4173                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4174                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4175                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4176
4177                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4178                 rdev->mec.hpd_eop_obj = NULL;
4179         }
4180 }
4181
4182 #define MEC_HPD_SIZE 2048
4183
4184 static int cik_mec_init(struct radeon_device *rdev)
4185 {
4186         int r;
4187         u32 *hpd;
4188
4189         /*
4190          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4191          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4192          */
4193         if (rdev->family == CHIP_KAVERI)
4194                 rdev->mec.num_mec = 2;
4195         else
4196                 rdev->mec.num_mec = 1;
4197         rdev->mec.num_pipe = 4;
4198         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4199
4200         if (rdev->mec.hpd_eop_obj == NULL) {
4201                 r = radeon_bo_create(rdev,
4202                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4203                                      PAGE_SIZE, true,
4204                                      RADEON_GEM_DOMAIN_GTT, NULL,
4205                                      &rdev->mec.hpd_eop_obj);
4206                 if (r) {
4207                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4208                         return r;
4209                 }
4210         }
4211
4212         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4213         if (unlikely(r != 0)) {
4214                 cik_mec_fini(rdev);
4215                 return r;
4216         }
4217         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4218                           &rdev->mec.hpd_eop_gpu_addr);
4219         if (r) {
4220                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4221                 cik_mec_fini(rdev);
4222                 return r;
4223         }
4224         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4225         if (r) {
4226                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4227                 cik_mec_fini(rdev);
4228                 return r;
4229         }
4230
4231         /* clear memory.  Not sure if this is required or not */
4232         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4233
4234         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4235         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4236
4237         return 0;
4238 }
4239
4240 struct hqd_registers
4241 {
4242         u32 cp_mqd_base_addr;
4243         u32 cp_mqd_base_addr_hi;
4244         u32 cp_hqd_active;
4245         u32 cp_hqd_vmid;
4246         u32 cp_hqd_persistent_state;
4247         u32 cp_hqd_pipe_priority;
4248         u32 cp_hqd_queue_priority;
4249         u32 cp_hqd_quantum;
4250         u32 cp_hqd_pq_base;
4251         u32 cp_hqd_pq_base_hi;
4252         u32 cp_hqd_pq_rptr;
4253         u32 cp_hqd_pq_rptr_report_addr;
4254         u32 cp_hqd_pq_rptr_report_addr_hi;
4255         u32 cp_hqd_pq_wptr_poll_addr;
4256         u32 cp_hqd_pq_wptr_poll_addr_hi;
4257         u32 cp_hqd_pq_doorbell_control;
4258         u32 cp_hqd_pq_wptr;
4259         u32 cp_hqd_pq_control;
4260         u32 cp_hqd_ib_base_addr;
4261         u32 cp_hqd_ib_base_addr_hi;
4262         u32 cp_hqd_ib_rptr;
4263         u32 cp_hqd_ib_control;
4264         u32 cp_hqd_iq_timer;
4265         u32 cp_hqd_iq_rptr;
4266         u32 cp_hqd_dequeue_request;
4267         u32 cp_hqd_dma_offload;
4268         u32 cp_hqd_sema_cmd;
4269         u32 cp_hqd_msg_type;
4270         u32 cp_hqd_atomic0_preop_lo;
4271         u32 cp_hqd_atomic0_preop_hi;
4272         u32 cp_hqd_atomic1_preop_lo;
4273         u32 cp_hqd_atomic1_preop_hi;
4274         u32 cp_hqd_hq_scheduler0;
4275         u32 cp_hqd_hq_scheduler1;
4276         u32 cp_mqd_control;
4277 };
4278
4279 struct bonaire_mqd
4280 {
4281         u32 header;
4282         u32 dispatch_initiator;
4283         u32 dimensions[3];
4284         u32 start_idx[3];
4285         u32 num_threads[3];
4286         u32 pipeline_stat_enable;
4287         u32 perf_counter_enable;
4288         u32 pgm[2];
4289         u32 tba[2];
4290         u32 tma[2];
4291         u32 pgm_rsrc[2];
4292         u32 vmid;
4293         u32 resource_limits;
4294         u32 static_thread_mgmt01[2];
4295         u32 tmp_ring_size;
4296         u32 static_thread_mgmt23[2];
4297         u32 restart[3];
4298         u32 thread_trace_enable;
4299         u32 reserved1;
4300         u32 user_data[16];
4301         u32 vgtcs_invoke_count[2];
4302         struct hqd_registers queue_state;
4303         u32 dequeue_cntr;
4304         u32 interrupt_queue[64];
4305 };
4306
4307 /**
4308  * cik_cp_compute_resume - setup the compute queue registers
4309  *
4310  * @rdev: radeon_device pointer
4311  *
4312  * Program the compute queues and test them to make sure they
4313  * are working.
4314  * Returns 0 for success, error for failure.
4315  */
4316 static int cik_cp_compute_resume(struct radeon_device *rdev)
4317 {
4318         int r, i, idx;
4319         u32 tmp;
4320         bool use_doorbell = true;
4321         u64 hqd_gpu_addr;
4322         u64 mqd_gpu_addr;
4323         u64 eop_gpu_addr;
4324         u64 wb_gpu_addr;
4325         u32 *buf;
4326         struct bonaire_mqd *mqd;
4327
4328         r = cik_cp_compute_start(rdev);
4329         if (r)
4330                 return r;
4331
4332         /* fix up chicken bits */
4333         tmp = RREG32(CP_CPF_DEBUG);
4334         tmp |= (1 << 23);
4335         WREG32(CP_CPF_DEBUG, tmp);
4336
4337         /* init the pipes */
4338         mutex_lock(&rdev->srbm_mutex);
4339         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4340                 int me = (i < 4) ? 1 : 2;
4341                 int pipe = (i < 4) ? i : (i - 4);
4342
4343                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4344
4345                 cik_srbm_select(rdev, me, pipe, 0, 0);
4346
4347                 /* write the EOP addr */
4348                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4349                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4350
4351                 /* set the VMID assigned */
4352                 WREG32(CP_HPD_EOP_VMID, 0);
4353
4354                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4355                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4356                 tmp &= ~EOP_SIZE_MASK;
4357                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4358                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4359         }
4360         cik_srbm_select(rdev, 0, 0, 0, 0);
4361         mutex_unlock(&rdev->srbm_mutex);
4362
4363         /* init the queues.  Just two for now. */
4364         for (i = 0; i < 2; i++) {
4365                 if (i == 0)
4366                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4367                 else
4368                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4369
4370                 if (rdev->ring[idx].mqd_obj == NULL) {
4371                         r = radeon_bo_create(rdev,
4372                                              sizeof(struct bonaire_mqd),
4373                                              PAGE_SIZE, true,
4374                                              RADEON_GEM_DOMAIN_GTT, NULL,
4375                                              &rdev->ring[idx].mqd_obj);
4376                         if (r) {
4377                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4378                                 return r;
4379                         }
4380                 }
4381
4382                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4383                 if (unlikely(r != 0)) {
4384                         cik_cp_compute_fini(rdev);
4385                         return r;
4386                 }
4387                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4388                                   &mqd_gpu_addr);
4389                 if (r) {
4390                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4391                         cik_cp_compute_fini(rdev);
4392                         return r;
4393                 }
4394                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4395                 if (r) {
4396                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4397                         cik_cp_compute_fini(rdev);
4398                         return r;
4399                 }
4400
4401                 /* init the mqd struct */
4402                 memset(buf, 0, sizeof(struct bonaire_mqd));
4403
4404                 mqd = (struct bonaire_mqd *)buf;
4405                 mqd->header = 0xC0310800;
4406                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4407                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4408                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4409                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4410
4411                 mutex_lock(&rdev->srbm_mutex);
4412                 cik_srbm_select(rdev, rdev->ring[idx].me,
4413                                 rdev->ring[idx].pipe,
4414                                 rdev->ring[idx].queue, 0);
4415
4416                 /* disable wptr polling */
4417                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4418                 tmp &= ~WPTR_POLL_EN;
4419                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4420
4421                 /* enable doorbell? */
4422                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4423                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4424                 if (use_doorbell)
4425                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4426                 else
4427                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4428                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4429                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4430
4431                 /* disable the queue if it's active */
4432                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4433                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4434                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4435                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4436                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4437                         for (i = 0; i < rdev->usec_timeout; i++) {
4438                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4439                                         break;
4440                                 udelay(1);
4441                         }
4442                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4443                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4444                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4445                 }
4446
4447                 /* set the pointer to the MQD */
4448                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4449                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4450                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4451                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4452                 /* set MQD vmid to 0 */
4453                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4454                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4455                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4456
4457                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4458                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4459                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4460                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4461                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4462                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4463
4464                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4465                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4466                 mqd->queue_state.cp_hqd_pq_control &=
4467                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4468
4469                 mqd->queue_state.cp_hqd_pq_control |=
4470                         order_base_2(rdev->ring[idx].ring_size / 8);
4471                 mqd->queue_state.cp_hqd_pq_control |=
4472                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4473 #ifdef __BIG_ENDIAN
4474                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4475 #endif
4476                 mqd->queue_state.cp_hqd_pq_control &=
4477                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4478                 mqd->queue_state.cp_hqd_pq_control |=
4479                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4480                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4481
4482                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4483                 if (i == 0)
4484                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4485                 else
4486                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4487                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4488                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4489                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4490                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4491                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4492
4493                 /* set the wb address wether it's enabled or not */
4494                 if (i == 0)
4495                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4496                 else
4497                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4498                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4499                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4500                         upper_32_bits(wb_gpu_addr) & 0xffff;
4501                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4502                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4503                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4504                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4505
4506                 /* enable the doorbell if requested */
4507                 if (use_doorbell) {
4508                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4509                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4510                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4511                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4512                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4513                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4514                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4515                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4516
4517                 } else {
4518                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4519                 }
4520                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4521                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4522
4523                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4524                 rdev->ring[idx].wptr = 0;
4525                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4526                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4527                 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
4528                 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
4529
4530                 /* set the vmid for the queue */
4531                 mqd->queue_state.cp_hqd_vmid = 0;
4532                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4533
4534                 /* activate the queue */
4535                 mqd->queue_state.cp_hqd_active = 1;
4536                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4537
4538                 cik_srbm_select(rdev, 0, 0, 0, 0);
4539                 mutex_unlock(&rdev->srbm_mutex);
4540
4541                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4542                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4543
4544                 rdev->ring[idx].ready = true;
4545                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4546                 if (r)
4547                         rdev->ring[idx].ready = false;
4548         }
4549
4550         return 0;
4551 }
4552
4553 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4554 {
4555         cik_cp_gfx_enable(rdev, enable);
4556         cik_cp_compute_enable(rdev, enable);
4557 }
4558
4559 static int cik_cp_load_microcode(struct radeon_device *rdev)
4560 {
4561         int r;
4562
4563         r = cik_cp_gfx_load_microcode(rdev);
4564         if (r)
4565                 return r;
4566         r = cik_cp_compute_load_microcode(rdev);
4567         if (r)
4568                 return r;
4569
4570         return 0;
4571 }
4572
4573 static void cik_cp_fini(struct radeon_device *rdev)
4574 {
4575         cik_cp_gfx_fini(rdev);
4576         cik_cp_compute_fini(rdev);
4577 }
4578
4579 static int cik_cp_resume(struct radeon_device *rdev)
4580 {
4581         int r;
4582
4583         cik_enable_gui_idle_interrupt(rdev, false);
4584
4585         r = cik_cp_load_microcode(rdev);
4586         if (r)
4587                 return r;
4588
4589         r = cik_cp_gfx_resume(rdev);
4590         if (r)
4591                 return r;
4592         r = cik_cp_compute_resume(rdev);
4593         if (r)
4594                 return r;
4595
4596         cik_enable_gui_idle_interrupt(rdev, true);
4597
4598         return 0;
4599 }
4600
4601 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4602 {
4603         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4604                 RREG32(GRBM_STATUS));
4605         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4606                 RREG32(GRBM_STATUS2));
4607         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4608                 RREG32(GRBM_STATUS_SE0));
4609         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4610                 RREG32(GRBM_STATUS_SE1));
4611         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4612                 RREG32(GRBM_STATUS_SE2));
4613         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4614                 RREG32(GRBM_STATUS_SE3));
4615         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4616                 RREG32(SRBM_STATUS));
4617         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4618                 RREG32(SRBM_STATUS2));
4619         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4620                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4621         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4622                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4623         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4624         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4625                  RREG32(CP_STALLED_STAT1));
4626         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4627                  RREG32(CP_STALLED_STAT2));
4628         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4629                  RREG32(CP_STALLED_STAT3));
4630         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4631                  RREG32(CP_CPF_BUSY_STAT));
4632         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4633                  RREG32(CP_CPF_STALLED_STAT1));
4634         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4635         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4636         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4637                  RREG32(CP_CPC_STALLED_STAT1));
4638         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4639 }
4640
4641 /**
4642  * cik_gpu_check_soft_reset - check which blocks are busy
4643  *
4644  * @rdev: radeon_device pointer
4645  *
4646  * Check which blocks are busy and return the relevant reset
4647  * mask to be used by cik_gpu_soft_reset().
4648  * Returns a mask of the blocks to be reset.
4649  */
4650 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4651 {
4652         u32 reset_mask = 0;
4653         u32 tmp;
4654
4655         /* GRBM_STATUS */
4656         tmp = RREG32(GRBM_STATUS);
4657         if (tmp & (PA_BUSY | SC_BUSY |
4658                    BCI_BUSY | SX_BUSY |
4659                    TA_BUSY | VGT_BUSY |
4660                    DB_BUSY | CB_BUSY |
4661                    GDS_BUSY | SPI_BUSY |
4662                    IA_BUSY | IA_BUSY_NO_DMA))
4663                 reset_mask |= RADEON_RESET_GFX;
4664
4665         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4666                 reset_mask |= RADEON_RESET_CP;
4667
4668         /* GRBM_STATUS2 */
4669         tmp = RREG32(GRBM_STATUS2);
4670         if (tmp & RLC_BUSY)
4671                 reset_mask |= RADEON_RESET_RLC;
4672
4673         /* SDMA0_STATUS_REG */
4674         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4675         if (!(tmp & SDMA_IDLE))
4676                 reset_mask |= RADEON_RESET_DMA;
4677
4678         /* SDMA1_STATUS_REG */
4679         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4680         if (!(tmp & SDMA_IDLE))
4681                 reset_mask |= RADEON_RESET_DMA1;
4682
4683         /* SRBM_STATUS2 */
4684         tmp = RREG32(SRBM_STATUS2);
4685         if (tmp & SDMA_BUSY)
4686                 reset_mask |= RADEON_RESET_DMA;
4687
4688         if (tmp & SDMA1_BUSY)
4689                 reset_mask |= RADEON_RESET_DMA1;
4690
4691         /* SRBM_STATUS */
4692         tmp = RREG32(SRBM_STATUS);
4693
4694         if (tmp & IH_BUSY)
4695                 reset_mask |= RADEON_RESET_IH;
4696
4697         if (tmp & SEM_BUSY)
4698                 reset_mask |= RADEON_RESET_SEM;
4699
4700         if (tmp & GRBM_RQ_PENDING)
4701                 reset_mask |= RADEON_RESET_GRBM;
4702
4703         if (tmp & VMC_BUSY)
4704                 reset_mask |= RADEON_RESET_VMC;
4705
4706         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4707                    MCC_BUSY | MCD_BUSY))
4708                 reset_mask |= RADEON_RESET_MC;
4709
4710         if (evergreen_is_display_hung(rdev))
4711                 reset_mask |= RADEON_RESET_DISPLAY;
4712
4713         /* Skip MC reset as it's mostly likely not hung, just busy */
4714         if (reset_mask & RADEON_RESET_MC) {
4715                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4716                 reset_mask &= ~RADEON_RESET_MC;
4717         }
4718
4719         return reset_mask;
4720 }
4721
4722 /**
4723  * cik_gpu_soft_reset - soft reset GPU
4724  *
4725  * @rdev: radeon_device pointer
4726  * @reset_mask: mask of which blocks to reset
4727  *
4728  * Soft reset the blocks specified in @reset_mask.
4729  */
4730 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4731 {
4732         struct evergreen_mc_save save;
4733         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4734         u32 tmp;
4735
4736         if (reset_mask == 0)
4737                 return;
4738
4739         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4740
4741         cik_print_gpu_status_regs(rdev);
4742         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4743                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4744         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4745                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4746
4747         /* disable CG/PG */
4748         cik_fini_pg(rdev);
4749         cik_fini_cg(rdev);
4750
4751         /* stop the rlc */
4752         cik_rlc_stop(rdev);
4753
4754         /* Disable GFX parsing/prefetching */
4755         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4756
4757         /* Disable MEC parsing/prefetching */
4758         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4759
4760         if (reset_mask & RADEON_RESET_DMA) {
4761                 /* sdma0 */
4762                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4763                 tmp |= SDMA_HALT;
4764                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4765         }
4766         if (reset_mask & RADEON_RESET_DMA1) {
4767                 /* sdma1 */
4768                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4769                 tmp |= SDMA_HALT;
4770                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4771         }
4772
4773         evergreen_mc_stop(rdev, &save);
4774         if (evergreen_mc_wait_for_idle(rdev)) {
4775                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4776         }
4777
4778         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4779                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4780
4781         if (reset_mask & RADEON_RESET_CP) {
4782                 grbm_soft_reset |= SOFT_RESET_CP;
4783
4784                 srbm_soft_reset |= SOFT_RESET_GRBM;
4785         }
4786
4787         if (reset_mask & RADEON_RESET_DMA)
4788                 srbm_soft_reset |= SOFT_RESET_SDMA;
4789
4790         if (reset_mask & RADEON_RESET_DMA1)
4791                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4792
4793         if (reset_mask & RADEON_RESET_DISPLAY)
4794                 srbm_soft_reset |= SOFT_RESET_DC;
4795
4796         if (reset_mask & RADEON_RESET_RLC)
4797                 grbm_soft_reset |= SOFT_RESET_RLC;
4798
4799         if (reset_mask & RADEON_RESET_SEM)
4800                 srbm_soft_reset |= SOFT_RESET_SEM;
4801
4802         if (reset_mask & RADEON_RESET_IH)
4803                 srbm_soft_reset |= SOFT_RESET_IH;
4804
4805         if (reset_mask & RADEON_RESET_GRBM)
4806                 srbm_soft_reset |= SOFT_RESET_GRBM;
4807
4808         if (reset_mask & RADEON_RESET_VMC)
4809                 srbm_soft_reset |= SOFT_RESET_VMC;
4810
4811         if (!(rdev->flags & RADEON_IS_IGP)) {
4812                 if (reset_mask & RADEON_RESET_MC)
4813                         srbm_soft_reset |= SOFT_RESET_MC;
4814         }
4815
4816         if (grbm_soft_reset) {
4817                 tmp = RREG32(GRBM_SOFT_RESET);
4818                 tmp |= grbm_soft_reset;
4819                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4820                 WREG32(GRBM_SOFT_RESET, tmp);
4821                 tmp = RREG32(GRBM_SOFT_RESET);
4822
4823                 udelay(50);
4824
4825                 tmp &= ~grbm_soft_reset;
4826                 WREG32(GRBM_SOFT_RESET, tmp);
4827                 tmp = RREG32(GRBM_SOFT_RESET);
4828         }
4829
4830         if (srbm_soft_reset) {
4831                 tmp = RREG32(SRBM_SOFT_RESET);
4832                 tmp |= srbm_soft_reset;
4833                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4834                 WREG32(SRBM_SOFT_RESET, tmp);
4835                 tmp = RREG32(SRBM_SOFT_RESET);
4836
4837                 udelay(50);
4838
4839                 tmp &= ~srbm_soft_reset;
4840                 WREG32(SRBM_SOFT_RESET, tmp);
4841                 tmp = RREG32(SRBM_SOFT_RESET);
4842         }
4843
4844         /* Wait a little for things to settle down */
4845         udelay(50);
4846
4847         evergreen_mc_resume(rdev, &save);
4848         udelay(50);
4849
4850         cik_print_gpu_status_regs(rdev);
4851 }
4852
4853 /**
4854  * cik_asic_reset - soft reset GPU
4855  *
4856  * @rdev: radeon_device pointer
4857  *
4858  * Look up which blocks are hung and attempt
4859  * to reset them.
4860  * Returns 0 for success.
4861  */
4862 int cik_asic_reset(struct radeon_device *rdev)
4863 {
4864         u32 reset_mask;
4865
4866         reset_mask = cik_gpu_check_soft_reset(rdev);
4867
4868         if (reset_mask)
4869                 r600_set_bios_scratch_engine_hung(rdev, true);
4870
4871         cik_gpu_soft_reset(rdev, reset_mask);
4872
4873         reset_mask = cik_gpu_check_soft_reset(rdev);
4874
4875         if (!reset_mask)
4876                 r600_set_bios_scratch_engine_hung(rdev, false);
4877
4878         return 0;
4879 }
4880
4881 /**
4882  * cik_gfx_is_lockup - check if the 3D engine is locked up
4883  *
4884  * @rdev: radeon_device pointer
4885  * @ring: radeon_ring structure holding ring information
4886  *
4887  * Check if the 3D engine is locked up (CIK).
4888  * Returns true if the engine is locked, false if not.
4889  */
4890 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4891 {
4892         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4893
4894         if (!(reset_mask & (RADEON_RESET_GFX |
4895                             RADEON_RESET_COMPUTE |
4896                             RADEON_RESET_CP))) {
4897                 radeon_ring_lockup_update(ring);
4898                 return false;
4899         }
4900         /* force CP activities */
4901         radeon_ring_force_activity(rdev, ring);
4902         return radeon_ring_test_lockup(rdev, ring);
4903 }
4904
4905 /* MC */
4906 /**
4907  * cik_mc_program - program the GPU memory controller
4908  *
4909  * @rdev: radeon_device pointer
4910  *
4911  * Set the location of vram, gart, and AGP in the GPU's
4912  * physical address space (CIK).
4913  */
4914 static void cik_mc_program(struct radeon_device *rdev)
4915 {
4916         struct evergreen_mc_save save;
4917         u32 tmp;
4918         int i, j;
4919
4920         /* Initialize HDP */
4921         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4922                 WREG32((0x2c14 + j), 0x00000000);
4923                 WREG32((0x2c18 + j), 0x00000000);
4924                 WREG32((0x2c1c + j), 0x00000000);
4925                 WREG32((0x2c20 + j), 0x00000000);
4926                 WREG32((0x2c24 + j), 0x00000000);
4927         }
4928         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4929
4930         evergreen_mc_stop(rdev, &save);
4931         if (radeon_mc_wait_for_idle(rdev)) {
4932                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4933         }
4934         /* Lockout access through VGA aperture*/
4935         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4936         /* Update configuration */
4937         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4938                rdev->mc.vram_start >> 12);
4939         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4940                rdev->mc.vram_end >> 12);
4941         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4942                rdev->vram_scratch.gpu_addr >> 12);
4943         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4944         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4945         WREG32(MC_VM_FB_LOCATION, tmp);
4946         /* XXX double check these! */
4947         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4948         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4949         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4950         WREG32(MC_VM_AGP_BASE, 0);
4951         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4952         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4953         if (radeon_mc_wait_for_idle(rdev)) {
4954                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4955         }
4956         evergreen_mc_resume(rdev, &save);
4957         /* we need to own VRAM, so turn off the VGA renderer here
4958          * to stop it overwriting our objects */
4959         rv515_vga_render_disable(rdev);
4960 }
4961
4962 /**
4963  * cik_mc_init - initialize the memory controller driver params
4964  *
4965  * @rdev: radeon_device pointer
4966  *
4967  * Look up the amount of vram, vram width, and decide how to place
4968  * vram and gart within the GPU's physical address space (CIK).
4969  * Returns 0 for success.
4970  */
4971 static int cik_mc_init(struct radeon_device *rdev)
4972 {
4973         u32 tmp;
4974         int chansize, numchan;
4975
4976         /* Get VRAM informations */
4977         rdev->mc.vram_is_ddr = true;
4978         tmp = RREG32(MC_ARB_RAMCFG);
4979         if (tmp & CHANSIZE_MASK) {
4980                 chansize = 64;
4981         } else {
4982                 chansize = 32;
4983         }
4984         tmp = RREG32(MC_SHARED_CHMAP);
4985         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4986         case 0:
4987         default:
4988                 numchan = 1;
4989                 break;
4990         case 1:
4991                 numchan = 2;
4992                 break;
4993         case 2:
4994                 numchan = 4;
4995                 break;
4996         case 3:
4997                 numchan = 8;
4998                 break;
4999         case 4:
5000                 numchan = 3;
5001                 break;
5002         case 5:
5003                 numchan = 6;
5004                 break;
5005         case 6:
5006                 numchan = 10;
5007                 break;
5008         case 7:
5009                 numchan = 12;
5010                 break;
5011         case 8:
5012                 numchan = 16;
5013                 break;
5014         }
5015         rdev->mc.vram_width = numchan * chansize;
5016         /* Could aper size report 0 ? */
5017         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5018         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5019         /* size in MB on si */
5020         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5021         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5022         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5023         si_vram_gtt_location(rdev, &rdev->mc);
5024         radeon_update_bandwidth_info(rdev);
5025
5026         return 0;
5027 }
5028
5029 /*
5030  * GART
5031  * VMID 0 is the physical GPU addresses as used by the kernel.
5032  * VMIDs 1-15 are used for userspace clients and are handled
5033  * by the radeon vm/hsa code.
5034  */
5035 /**
5036  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5037  *
5038  * @rdev: radeon_device pointer
5039  *
5040  * Flush the TLB for the VMID 0 page table (CIK).
5041  */
5042 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5043 {
5044         /* flush hdp cache */
5045         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5046
5047         /* bits 0-15 are the VM contexts0-15 */
5048         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5049 }
5050
5051 /**
5052  * cik_pcie_gart_enable - gart enable
5053  *
5054  * @rdev: radeon_device pointer
5055  *
5056  * This sets up the TLBs, programs the page tables for VMID0,
5057  * sets up the hw for VMIDs 1-15 which are allocated on
5058  * demand, and sets up the global locations for the LDS, GDS,
5059  * and GPUVM for FSA64 clients (CIK).
5060  * Returns 0 for success, errors for failure.
5061  */
5062 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5063 {
5064         int r, i;
5065
5066         if (rdev->gart.robj == NULL) {
5067                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5068                 return -EINVAL;
5069         }
5070         r = radeon_gart_table_vram_pin(rdev);
5071         if (r)
5072                 return r;
5073         radeon_gart_restore(rdev);
5074         /* Setup TLB control */
5075         WREG32(MC_VM_MX_L1_TLB_CNTL,
5076                (0xA << 7) |
5077                ENABLE_L1_TLB |
5078                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5079                ENABLE_ADVANCED_DRIVER_MODEL |
5080                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5081         /* Setup L2 cache */
5082         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5083                ENABLE_L2_FRAGMENT_PROCESSING |
5084                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5085                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5086                EFFECTIVE_L2_QUEUE_SIZE(7) |
5087                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5088         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5089         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5090                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5091         /* setup context0 */
5092         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5093         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5094         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5095         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5096                         (u32)(rdev->dummy_page.addr >> 12));
5097         WREG32(VM_CONTEXT0_CNTL2, 0);
5098         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5099                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5100
5101         WREG32(0x15D4, 0);
5102         WREG32(0x15D8, 0);
5103         WREG32(0x15DC, 0);
5104
5105         /* empty context1-15 */
5106         /* FIXME start with 4G, once using 2 level pt switch to full
5107          * vm size space
5108          */
5109         /* set vm size, must be a multiple of 4 */
5110         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5111         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5112         for (i = 1; i < 16; i++) {
5113                 if (i < 8)
5114                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5115                                rdev->gart.table_addr >> 12);
5116                 else
5117                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5118                                rdev->gart.table_addr >> 12);
5119         }
5120
5121         /* enable context1-15 */
5122         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5123                (u32)(rdev->dummy_page.addr >> 12));
5124         WREG32(VM_CONTEXT1_CNTL2, 4);
5125         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5126                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5127                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5128                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5129                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5130                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5131                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5132                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5133                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5134                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5135                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5136                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5137                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5138
5139         /* TC cache setup ??? */
5140         WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
5141         WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
5142         WREG32(TC_CFG_L1_STORE_POLICY, 0);
5143
5144         WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
5145         WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
5146         WREG32(TC_CFG_L2_STORE_POLICY0, 0);
5147         WREG32(TC_CFG_L2_STORE_POLICY1, 0);
5148         WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
5149
5150         WREG32(TC_CFG_L1_VOLATILE, 0);
5151         WREG32(TC_CFG_L2_VOLATILE, 0);
5152
5153         if (rdev->family == CHIP_KAVERI) {
5154                 u32 tmp = RREG32(CHUB_CONTROL);
5155                 tmp &= ~BYPASS_VM;
5156                 WREG32(CHUB_CONTROL, tmp);
5157         }
5158
5159         /* XXX SH_MEM regs */
5160         /* where to put LDS, scratch, GPUVM in FSA64 space */
5161         mutex_lock(&rdev->srbm_mutex);
5162         for (i = 0; i < 16; i++) {
5163                 cik_srbm_select(rdev, 0, 0, 0, i);
5164                 /* CP and shaders */
5165                 WREG32(SH_MEM_CONFIG, 0);
5166                 WREG32(SH_MEM_APE1_BASE, 1);
5167                 WREG32(SH_MEM_APE1_LIMIT, 0);
5168                 WREG32(SH_MEM_BASES, 0);
5169                 /* SDMA GFX */
5170                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5171                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5172                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5173                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5174                 /* XXX SDMA RLC - todo */
5175         }
5176         cik_srbm_select(rdev, 0, 0, 0, 0);
5177         mutex_unlock(&rdev->srbm_mutex);
5178
5179         cik_pcie_gart_tlb_flush(rdev);
5180         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5181                  (unsigned)(rdev->mc.gtt_size >> 20),
5182                  (unsigned long long)rdev->gart.table_addr);
5183         rdev->gart.ready = true;
5184         return 0;
5185 }
5186
5187 /**
5188  * cik_pcie_gart_disable - gart disable
5189  *
5190  * @rdev: radeon_device pointer
5191  *
5192  * This disables all VM page table (CIK).
5193  */
5194 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5195 {
5196         /* Disable all tables */
5197         WREG32(VM_CONTEXT0_CNTL, 0);
5198         WREG32(VM_CONTEXT1_CNTL, 0);
5199         /* Setup TLB control */
5200         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5201                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5202         /* Setup L2 cache */
5203         WREG32(VM_L2_CNTL,
5204                ENABLE_L2_FRAGMENT_PROCESSING |
5205                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5206                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5207                EFFECTIVE_L2_QUEUE_SIZE(7) |
5208                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5209         WREG32(VM_L2_CNTL2, 0);
5210         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5211                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5212         radeon_gart_table_vram_unpin(rdev);
5213 }
5214
5215 /**
5216  * cik_pcie_gart_fini - vm fini callback
5217  *
5218  * @rdev: radeon_device pointer
5219  *
5220  * Tears down the driver GART/VM setup (CIK).
5221  */
5222 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5223 {
5224         cik_pcie_gart_disable(rdev);
5225         radeon_gart_table_vram_free(rdev);
5226         radeon_gart_fini(rdev);
5227 }
5228
5229 /* vm parser */
5230 /**
5231  * cik_ib_parse - vm ib_parse callback
5232  *
5233  * @rdev: radeon_device pointer
5234  * @ib: indirect buffer pointer
5235  *
5236  * CIK uses hw IB checking so this is a nop (CIK).
5237  */
5238 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5239 {
5240         return 0;
5241 }
5242
5243 /*
5244  * vm
5245  * VMID 0 is the physical GPU addresses as used by the kernel.
5246  * VMIDs 1-15 are used for userspace clients and are handled
5247  * by the radeon vm/hsa code.
5248  */
5249 /**
5250  * cik_vm_init - cik vm init callback
5251  *
5252  * @rdev: radeon_device pointer
5253  *
5254  * Inits cik specific vm parameters (number of VMs, base of vram for
5255  * VMIDs 1-15) (CIK).
5256  * Returns 0 for success.
5257  */
5258 int cik_vm_init(struct radeon_device *rdev)
5259 {
5260         /* number of VMs */
5261         rdev->vm_manager.nvm = 16;
5262         /* base offset of vram pages */
5263         if (rdev->flags & RADEON_IS_IGP) {
5264                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5265                 tmp <<= 22;
5266                 rdev->vm_manager.vram_base_offset = tmp;
5267         } else
5268                 rdev->vm_manager.vram_base_offset = 0;
5269
5270         return 0;
5271 }
5272
5273 /**
5274  * cik_vm_fini - cik vm fini callback
5275  *
5276  * @rdev: radeon_device pointer
5277  *
5278  * Tear down any asic specific VM setup (CIK).
5279  */
5280 void cik_vm_fini(struct radeon_device *rdev)
5281 {
5282 }
5283
5284 /**
5285  * cik_vm_decode_fault - print human readable fault info
5286  *
5287  * @rdev: radeon_device pointer
5288  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5289  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5290  *
5291  * Print human readable fault information (CIK).
5292  */
5293 static void cik_vm_decode_fault(struct radeon_device *rdev,
5294                                 u32 status, u32 addr, u32 mc_client)
5295 {
5296         u32 mc_id;
5297         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5298         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5299         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5300                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5301
5302         if (rdev->family == CHIP_HAWAII)
5303                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5304         else
5305                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5306
5307         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5308                protections, vmid, addr,
5309                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5310                block, mc_client, mc_id);
5311 }
5312
5313 /**
5314  * cik_vm_flush - cik vm flush using the CP
5315  *
5316  * @rdev: radeon_device pointer
5317  *
5318  * Update the page table base and flush the VM TLB
5319  * using the CP (CIK).
5320  */
5321 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5322 {
5323         struct radeon_ring *ring = &rdev->ring[ridx];
5324
5325         if (vm == NULL)
5326                 return;
5327
5328         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5329         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5330                                  WRITE_DATA_DST_SEL(0)));
5331         if (vm->id < 8) {
5332                 radeon_ring_write(ring,
5333                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5334         } else {
5335                 radeon_ring_write(ring,
5336                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5337         }
5338         radeon_ring_write(ring, 0);
5339         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5340
5341         /* update SH_MEM_* regs */
5342         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5343         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5344                                  WRITE_DATA_DST_SEL(0)));
5345         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5346         radeon_ring_write(ring, 0);
5347         radeon_ring_write(ring, VMID(vm->id));
5348
5349         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5350         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5351                                  WRITE_DATA_DST_SEL(0)));
5352         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5353         radeon_ring_write(ring, 0);
5354
5355         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5356         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5357         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5358         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5359
5360         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5361         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5362                                  WRITE_DATA_DST_SEL(0)));
5363         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5364         radeon_ring_write(ring, 0);
5365         radeon_ring_write(ring, VMID(0));
5366
5367         /* HDP flush */
5368         /* We should be using the WAIT_REG_MEM packet here like in
5369          * cik_fence_ring_emit(), but it causes the CP to hang in this
5370          * context...
5371          */
5372         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5373         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5374                                  WRITE_DATA_DST_SEL(0)));
5375         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5376         radeon_ring_write(ring, 0);
5377         radeon_ring_write(ring, 0);
5378
5379         /* bits 0-15 are the VM contexts0-15 */
5380         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5381         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5382                                  WRITE_DATA_DST_SEL(0)));
5383         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5384         radeon_ring_write(ring, 0);
5385         radeon_ring_write(ring, 1 << vm->id);
5386
5387         /* compute doesn't have PFP */
5388         if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5389                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5390                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5391                 radeon_ring_write(ring, 0x0);
5392         }
5393 }
5394
5395 /*
5396  * RLC
5397  * The RLC is a multi-purpose microengine that handles a
5398  * variety of functions, the most important of which is
5399  * the interrupt controller.
5400  */
5401 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5402                                           bool enable)
5403 {
5404         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5405
5406         if (enable)
5407                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5408         else
5409                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5410         WREG32(CP_INT_CNTL_RING0, tmp);
5411 }
5412
5413 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5414 {
5415         u32 tmp;
5416
5417         tmp = RREG32(RLC_LB_CNTL);
5418         if (enable)
5419                 tmp |= LOAD_BALANCE_ENABLE;
5420         else
5421                 tmp &= ~LOAD_BALANCE_ENABLE;
5422         WREG32(RLC_LB_CNTL, tmp);
5423 }
5424
5425 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5426 {
5427         u32 i, j, k;
5428         u32 mask;
5429
5430         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5431                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5432                         cik_select_se_sh(rdev, i, j);
5433                         for (k = 0; k < rdev->usec_timeout; k++) {
5434                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5435                                         break;
5436                                 udelay(1);
5437                         }
5438                 }
5439         }
5440         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5441
5442         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5443         for (k = 0; k < rdev->usec_timeout; k++) {
5444                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5445                         break;
5446                 udelay(1);
5447         }
5448 }
5449
5450 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5451 {
5452         u32 tmp;
5453
5454         tmp = RREG32(RLC_CNTL);
5455         if (tmp != rlc)
5456                 WREG32(RLC_CNTL, rlc);
5457 }
5458
5459 static u32 cik_halt_rlc(struct radeon_device *rdev)
5460 {
5461         u32 data, orig;
5462
5463         orig = data = RREG32(RLC_CNTL);
5464
5465         if (data & RLC_ENABLE) {
5466                 u32 i;
5467
5468                 data &= ~RLC_ENABLE;
5469                 WREG32(RLC_CNTL, data);
5470
5471                 for (i = 0; i < rdev->usec_timeout; i++) {
5472                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5473                                 break;
5474                         udelay(1);
5475                 }
5476
5477                 cik_wait_for_rlc_serdes(rdev);
5478         }
5479
5480         return orig;
5481 }
5482
5483 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5484 {
5485         u32 tmp, i, mask;
5486
5487         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5488         WREG32(RLC_GPR_REG2, tmp);
5489
5490         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5491         for (i = 0; i < rdev->usec_timeout; i++) {
5492                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5493                         break;
5494                 udelay(1);
5495         }
5496
5497         for (i = 0; i < rdev->usec_timeout; i++) {
5498                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5499                         break;
5500                 udelay(1);
5501         }
5502 }
5503
5504 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5505 {
5506         u32 tmp;
5507
5508         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5509         WREG32(RLC_GPR_REG2, tmp);
5510 }
5511
5512 /**
5513  * cik_rlc_stop - stop the RLC ME
5514  *
5515  * @rdev: radeon_device pointer
5516  *
5517  * Halt the RLC ME (MicroEngine) (CIK).
5518  */
5519 static void cik_rlc_stop(struct radeon_device *rdev)
5520 {
5521         WREG32(RLC_CNTL, 0);
5522
5523         cik_enable_gui_idle_interrupt(rdev, false);
5524
5525         cik_wait_for_rlc_serdes(rdev);
5526 }
5527
5528 /**
5529  * cik_rlc_start - start the RLC ME
5530  *
5531  * @rdev: radeon_device pointer
5532  *
5533  * Unhalt the RLC ME (MicroEngine) (CIK).
5534  */
5535 static void cik_rlc_start(struct radeon_device *rdev)
5536 {
5537         WREG32(RLC_CNTL, RLC_ENABLE);
5538
5539         cik_enable_gui_idle_interrupt(rdev, true);
5540
5541         udelay(50);
5542 }
5543
5544 /**
5545  * cik_rlc_resume - setup the RLC hw
5546  *
5547  * @rdev: radeon_device pointer
5548  *
5549  * Initialize the RLC registers, load the ucode,
5550  * and start the RLC (CIK).
5551  * Returns 0 for success, -EINVAL if the ucode is not available.
5552  */
5553 static int cik_rlc_resume(struct radeon_device *rdev)
5554 {
5555         u32 i, size, tmp;
5556         const __be32 *fw_data;
5557
5558         if (!rdev->rlc_fw)
5559                 return -EINVAL;
5560
5561         switch (rdev->family) {
5562         case CHIP_BONAIRE:
5563         case CHIP_HAWAII:
5564         default:
5565                 size = BONAIRE_RLC_UCODE_SIZE;
5566                 break;
5567         case CHIP_KAVERI:
5568                 size = KV_RLC_UCODE_SIZE;
5569                 break;
5570         case CHIP_KABINI:
5571                 size = KB_RLC_UCODE_SIZE;
5572                 break;
5573         }
5574
5575         cik_rlc_stop(rdev);
5576
5577         /* disable CG */
5578         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5579         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5580
5581         si_rlc_reset(rdev);
5582
5583         cik_init_pg(rdev);
5584
5585         cik_init_cg(rdev);
5586
5587         WREG32(RLC_LB_CNTR_INIT, 0);
5588         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5589
5590         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5591         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5592         WREG32(RLC_LB_PARAMS, 0x00600408);
5593         WREG32(RLC_LB_CNTL, 0x80000004);
5594
5595         WREG32(RLC_MC_CNTL, 0);
5596         WREG32(RLC_UCODE_CNTL, 0);
5597
5598         fw_data = (const __be32 *)rdev->rlc_fw->data;
5599                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5600         for (i = 0; i < size; i++)
5601                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5602         WREG32(RLC_GPM_UCODE_ADDR, 0);
5603
5604         /* XXX - find out what chips support lbpw */
5605         cik_enable_lbpw(rdev, false);
5606
5607         if (rdev->family == CHIP_BONAIRE)
5608                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5609
5610         cik_rlc_start(rdev);
5611
5612         return 0;
5613 }
5614
5615 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5616 {
5617         u32 data, orig, tmp, tmp2;
5618
5619         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5620
5621         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5622                 cik_enable_gui_idle_interrupt(rdev, true);
5623
5624                 tmp = cik_halt_rlc(rdev);
5625
5626                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5627                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5628                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5629                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5630                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5631
5632                 cik_update_rlc(rdev, tmp);
5633
5634                 data |= CGCG_EN | CGLS_EN;
5635         } else {
5636                 cik_enable_gui_idle_interrupt(rdev, false);
5637
5638                 RREG32(CB_CGTT_SCLK_CTRL);
5639                 RREG32(CB_CGTT_SCLK_CTRL);
5640                 RREG32(CB_CGTT_SCLK_CTRL);
5641                 RREG32(CB_CGTT_SCLK_CTRL);
5642
5643                 data &= ~(CGCG_EN | CGLS_EN);
5644         }
5645
5646         if (orig != data)
5647                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5648
5649 }
5650
5651 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5652 {
5653         u32 data, orig, tmp = 0;
5654
5655         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5656                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5657                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5658                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5659                                 data |= CP_MEM_LS_EN;
5660                                 if (orig != data)
5661                                         WREG32(CP_MEM_SLP_CNTL, data);
5662                         }
5663                 }
5664
5665                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5666                 data &= 0xfffffffd;
5667                 if (orig != data)
5668                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5669
5670                 tmp = cik_halt_rlc(rdev);
5671
5672                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5673                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5674                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5675                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5676                 WREG32(RLC_SERDES_WR_CTRL, data);
5677
5678                 cik_update_rlc(rdev, tmp);
5679
5680                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5681                         orig = data = RREG32(CGTS_SM_CTRL_REG);
5682                         data &= ~SM_MODE_MASK;
5683                         data |= SM_MODE(0x2);
5684                         data |= SM_MODE_ENABLE;
5685                         data &= ~CGTS_OVERRIDE;
5686                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5687                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5688                                 data &= ~CGTS_LS_OVERRIDE;
5689                         data &= ~ON_MONITOR_ADD_MASK;
5690                         data |= ON_MONITOR_ADD_EN;
5691                         data |= ON_MONITOR_ADD(0x96);
5692                         if (orig != data)
5693                                 WREG32(CGTS_SM_CTRL_REG, data);
5694                 }
5695         } else {
5696                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5697                 data |= 0x00000002;
5698                 if (orig != data)
5699                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5700
5701                 data = RREG32(RLC_MEM_SLP_CNTL);
5702                 if (data & RLC_MEM_LS_EN) {
5703                         data &= ~RLC_MEM_LS_EN;
5704                         WREG32(RLC_MEM_SLP_CNTL, data);
5705                 }
5706
5707                 data = RREG32(CP_MEM_SLP_CNTL);
5708                 if (data & CP_MEM_LS_EN) {
5709                         data &= ~CP_MEM_LS_EN;
5710                         WREG32(CP_MEM_SLP_CNTL, data);
5711                 }
5712
5713                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5714                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5715                 if (orig != data)
5716                         WREG32(CGTS_SM_CTRL_REG, data);
5717
5718                 tmp = cik_halt_rlc(rdev);
5719
5720                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5721                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5722                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5723                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5724                 WREG32(RLC_SERDES_WR_CTRL, data);
5725
5726                 cik_update_rlc(rdev, tmp);
5727         }
5728 }
5729
5730 static const u32 mc_cg_registers[] =
5731 {
5732         MC_HUB_MISC_HUB_CG,
5733         MC_HUB_MISC_SIP_CG,
5734         MC_HUB_MISC_VM_CG,
5735         MC_XPB_CLK_GAT,
5736         ATC_MISC_CG,
5737         MC_CITF_MISC_WR_CG,
5738         MC_CITF_MISC_RD_CG,
5739         MC_CITF_MISC_VM_CG,
5740         VM_L2_CG,
5741 };
5742
5743 static void cik_enable_mc_ls(struct radeon_device *rdev,
5744                              bool enable)
5745 {
5746         int i;
5747         u32 orig, data;
5748
5749         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5750                 orig = data = RREG32(mc_cg_registers[i]);
5751                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5752                         data |= MC_LS_ENABLE;
5753                 else
5754                         data &= ~MC_LS_ENABLE;
5755                 if (data != orig)
5756                         WREG32(mc_cg_registers[i], data);
5757         }
5758 }
5759
5760 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5761                                bool enable)
5762 {
5763         int i;
5764         u32 orig, data;
5765
5766         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5767                 orig = data = RREG32(mc_cg_registers[i]);
5768                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5769                         data |= MC_CG_ENABLE;
5770                 else
5771                         data &= ~MC_CG_ENABLE;
5772                 if (data != orig)
5773                         WREG32(mc_cg_registers[i], data);
5774         }
5775 }
5776
5777 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5778                                  bool enable)
5779 {
5780         u32 orig, data;
5781
5782         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5783                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5784                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5785         } else {
5786                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5787                 data |= 0xff000000;
5788                 if (data != orig)
5789                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5790
5791                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5792                 data |= 0xff000000;
5793                 if (data != orig)
5794                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5795         }
5796 }
5797
5798 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5799                                  bool enable)
5800 {
5801         u32 orig, data;
5802
5803         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5804                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5805                 data |= 0x100;
5806                 if (orig != data)
5807                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5808
5809                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5810                 data |= 0x100;
5811                 if (orig != data)
5812                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5813         } else {
5814                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5815                 data &= ~0x100;
5816                 if (orig != data)
5817                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5818
5819                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5820                 data &= ~0x100;
5821                 if (orig != data)
5822                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5823         }
5824 }
5825
5826 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5827                                 bool enable)
5828 {
5829         u32 orig, data;
5830
5831         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5832                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5833                 data = 0xfff;
5834                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5835
5836                 orig = data = RREG32(UVD_CGC_CTRL);
5837                 data |= DCM;
5838                 if (orig != data)
5839                         WREG32(UVD_CGC_CTRL, data);
5840         } else {
5841                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5842                 data &= ~0xfff;
5843                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5844
5845                 orig = data = RREG32(UVD_CGC_CTRL);
5846                 data &= ~DCM;
5847                 if (orig != data)
5848                         WREG32(UVD_CGC_CTRL, data);
5849         }
5850 }
5851
5852 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5853                                bool enable)
5854 {
5855         u32 orig, data;
5856
5857         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5858
5859         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5860                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5861                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5862         else
5863                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5864                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5865
5866         if (orig != data)
5867                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
5868 }
5869
5870 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5871                                 bool enable)
5872 {
5873         u32 orig, data;
5874
5875         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5876
5877         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5878                 data &= ~CLOCK_GATING_DIS;
5879         else
5880                 data |= CLOCK_GATING_DIS;
5881
5882         if (orig != data)
5883                 WREG32(HDP_HOST_PATH_CNTL, data);
5884 }
5885
5886 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5887                               bool enable)
5888 {
5889         u32 orig, data;
5890
5891         orig = data = RREG32(HDP_MEM_POWER_LS);
5892
5893         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5894                 data |= HDP_LS_ENABLE;
5895         else
5896                 data &= ~HDP_LS_ENABLE;
5897
5898         if (orig != data)
5899                 WREG32(HDP_MEM_POWER_LS, data);
5900 }
5901
5902 void cik_update_cg(struct radeon_device *rdev,
5903                    u32 block, bool enable)
5904 {
5905
5906         if (block & RADEON_CG_BLOCK_GFX) {
5907                 cik_enable_gui_idle_interrupt(rdev, false);
5908                 /* order matters! */
5909                 if (enable) {
5910                         cik_enable_mgcg(rdev, true);
5911                         cik_enable_cgcg(rdev, true);
5912                 } else {
5913                         cik_enable_cgcg(rdev, false);
5914                         cik_enable_mgcg(rdev, false);
5915                 }
5916                 cik_enable_gui_idle_interrupt(rdev, true);
5917         }
5918
5919         if (block & RADEON_CG_BLOCK_MC) {
5920                 if (!(rdev->flags & RADEON_IS_IGP)) {
5921                         cik_enable_mc_mgcg(rdev, enable);
5922                         cik_enable_mc_ls(rdev, enable);
5923                 }
5924         }
5925
5926         if (block & RADEON_CG_BLOCK_SDMA) {
5927                 cik_enable_sdma_mgcg(rdev, enable);
5928                 cik_enable_sdma_mgls(rdev, enable);
5929         }
5930
5931         if (block & RADEON_CG_BLOCK_BIF) {
5932                 cik_enable_bif_mgls(rdev, enable);
5933         }
5934
5935         if (block & RADEON_CG_BLOCK_UVD) {
5936                 if (rdev->has_uvd)
5937                         cik_enable_uvd_mgcg(rdev, enable);
5938         }
5939
5940         if (block & RADEON_CG_BLOCK_HDP) {
5941                 cik_enable_hdp_mgcg(rdev, enable);
5942                 cik_enable_hdp_ls(rdev, enable);
5943         }
5944 }
5945
5946 static void cik_init_cg(struct radeon_device *rdev)
5947 {
5948
5949         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5950
5951         if (rdev->has_uvd)
5952                 si_init_uvd_internal_cg(rdev);
5953
5954         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5955                              RADEON_CG_BLOCK_SDMA |
5956                              RADEON_CG_BLOCK_BIF |
5957                              RADEON_CG_BLOCK_UVD |
5958                              RADEON_CG_BLOCK_HDP), true);
5959 }
5960
5961 static void cik_fini_cg(struct radeon_device *rdev)
5962 {
5963         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5964                              RADEON_CG_BLOCK_SDMA |
5965                              RADEON_CG_BLOCK_BIF |
5966                              RADEON_CG_BLOCK_UVD |
5967                              RADEON_CG_BLOCK_HDP), false);
5968
5969         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5970 }
5971
5972 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5973                                           bool enable)
5974 {
5975         u32 data, orig;
5976
5977         orig = data = RREG32(RLC_PG_CNTL);
5978         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5979                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5980         else
5981                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5982         if (orig != data)
5983                 WREG32(RLC_PG_CNTL, data);
5984 }
5985
5986 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5987                                           bool enable)
5988 {
5989         u32 data, orig;
5990
5991         orig = data = RREG32(RLC_PG_CNTL);
5992         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5993                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5994         else
5995                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5996         if (orig != data)
5997                 WREG32(RLC_PG_CNTL, data);
5998 }
5999
6000 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6001 {
6002         u32 data, orig;
6003
6004         orig = data = RREG32(RLC_PG_CNTL);
6005         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6006                 data &= ~DISABLE_CP_PG;
6007         else
6008                 data |= DISABLE_CP_PG;
6009         if (orig != data)
6010                 WREG32(RLC_PG_CNTL, data);
6011 }
6012
6013 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6014 {
6015         u32 data, orig;
6016
6017         orig = data = RREG32(RLC_PG_CNTL);
6018         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6019                 data &= ~DISABLE_GDS_PG;
6020         else
6021                 data |= DISABLE_GDS_PG;
6022         if (orig != data)
6023                 WREG32(RLC_PG_CNTL, data);
6024 }
6025
6026 #define CP_ME_TABLE_SIZE    96
6027 #define CP_ME_TABLE_OFFSET  2048
6028 #define CP_MEC_TABLE_OFFSET 4096
6029
6030 void cik_init_cp_pg_table(struct radeon_device *rdev)
6031 {
6032         const __be32 *fw_data;
6033         volatile u32 *dst_ptr;
6034         int me, i, max_me = 4;
6035         u32 bo_offset = 0;
6036         u32 table_offset;
6037
6038         if (rdev->family == CHIP_KAVERI)
6039                 max_me = 5;
6040
6041         if (rdev->rlc.cp_table_ptr == NULL)
6042                 return;
6043
6044         /* write the cp table buffer */
6045         dst_ptr = rdev->rlc.cp_table_ptr;
6046         for (me = 0; me < max_me; me++) {
6047                 if (me == 0) {
6048                         fw_data = (const __be32 *)rdev->ce_fw->data;
6049                         table_offset = CP_ME_TABLE_OFFSET;
6050                 } else if (me == 1) {
6051                         fw_data = (const __be32 *)rdev->pfp_fw->data;
6052                         table_offset = CP_ME_TABLE_OFFSET;
6053                 } else if (me == 2) {
6054                         fw_data = (const __be32 *)rdev->me_fw->data;
6055                         table_offset = CP_ME_TABLE_OFFSET;
6056                 } else {
6057                         fw_data = (const __be32 *)rdev->mec_fw->data;
6058                         table_offset = CP_MEC_TABLE_OFFSET;
6059                 }
6060
6061                 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6062                         dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6063                 }
6064                 bo_offset += CP_ME_TABLE_SIZE;
6065         }
6066 }
6067
6068 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6069                                 bool enable)
6070 {
6071         u32 data, orig;
6072
6073         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6074                 orig = data = RREG32(RLC_PG_CNTL);
6075                 data |= GFX_PG_ENABLE;
6076                 if (orig != data)
6077                         WREG32(RLC_PG_CNTL, data);
6078
6079                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6080                 data |= AUTO_PG_EN;
6081                 if (orig != data)
6082                         WREG32(RLC_AUTO_PG_CTRL, data);
6083         } else {
6084                 orig = data = RREG32(RLC_PG_CNTL);
6085                 data &= ~GFX_PG_ENABLE;
6086                 if (orig != data)
6087                         WREG32(RLC_PG_CNTL, data);
6088
6089                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6090                 data &= ~AUTO_PG_EN;
6091                 if (orig != data)
6092                         WREG32(RLC_AUTO_PG_CTRL, data);
6093
6094                 data = RREG32(DB_RENDER_CONTROL);
6095         }
6096 }
6097
6098 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6099 {
6100         u32 mask = 0, tmp, tmp1;
6101         int i;
6102
6103         cik_select_se_sh(rdev, se, sh);
6104         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6105         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6106         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6107
6108         tmp &= 0xffff0000;
6109
6110         tmp |= tmp1;
6111         tmp >>= 16;
6112
6113         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6114                 mask <<= 1;
6115                 mask |= 1;
6116         }
6117
6118         return (~tmp) & mask;
6119 }
6120
6121 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6122 {
6123         u32 i, j, k, active_cu_number = 0;
6124         u32 mask, counter, cu_bitmap;
6125         u32 tmp = 0;
6126
6127         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6128                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6129                         mask = 1;
6130                         cu_bitmap = 0;
6131                         counter = 0;
6132                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6133                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6134                                         if (counter < 2)
6135                                                 cu_bitmap |= mask;
6136                                         counter ++;
6137                                 }
6138                                 mask <<= 1;
6139                         }
6140
6141                         active_cu_number += counter;
6142                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6143                 }
6144         }
6145
6146         WREG32(RLC_PG_AO_CU_MASK, tmp);
6147
6148         tmp = RREG32(RLC_MAX_PG_CU);
6149         tmp &= ~MAX_PU_CU_MASK;
6150         tmp |= MAX_PU_CU(active_cu_number);
6151         WREG32(RLC_MAX_PG_CU, tmp);
6152 }
6153
6154 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6155                                        bool enable)
6156 {
6157         u32 data, orig;
6158
6159         orig = data = RREG32(RLC_PG_CNTL);
6160         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6161                 data |= STATIC_PER_CU_PG_ENABLE;
6162         else
6163                 data &= ~STATIC_PER_CU_PG_ENABLE;
6164         if (orig != data)
6165                 WREG32(RLC_PG_CNTL, data);
6166 }
6167
6168 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6169                                         bool enable)
6170 {
6171         u32 data, orig;
6172
6173         orig = data = RREG32(RLC_PG_CNTL);
6174         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6175                 data |= DYN_PER_CU_PG_ENABLE;
6176         else
6177                 data &= ~DYN_PER_CU_PG_ENABLE;
6178         if (orig != data)
6179                 WREG32(RLC_PG_CNTL, data);
6180 }
6181
6182 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6183 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6184
6185 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6186 {
6187         u32 data, orig;
6188         u32 i;
6189
6190         if (rdev->rlc.cs_data) {
6191                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6192                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6193                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6194                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6195         } else {
6196                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6197                 for (i = 0; i < 3; i++)
6198                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6199         }
6200         if (rdev->rlc.reg_list) {
6201                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6202                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6203                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6204         }
6205
6206         orig = data = RREG32(RLC_PG_CNTL);
6207         data |= GFX_PG_SRC;
6208         if (orig != data)
6209                 WREG32(RLC_PG_CNTL, data);
6210
6211         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6212         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6213
6214         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6215         data &= ~IDLE_POLL_COUNT_MASK;
6216         data |= IDLE_POLL_COUNT(0x60);
6217         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6218
6219         data = 0x10101010;
6220         WREG32(RLC_PG_DELAY, data);
6221
6222         data = RREG32(RLC_PG_DELAY_2);
6223         data &= ~0xff;
6224         data |= 0x3;
6225         WREG32(RLC_PG_DELAY_2, data);
6226
6227         data = RREG32(RLC_AUTO_PG_CTRL);
6228         data &= ~GRBM_REG_SGIT_MASK;
6229         data |= GRBM_REG_SGIT(0x700);
6230         WREG32(RLC_AUTO_PG_CTRL, data);
6231
6232 }
6233
6234 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6235 {
6236         cik_enable_gfx_cgpg(rdev, enable);
6237         cik_enable_gfx_static_mgpg(rdev, enable);
6238         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6239 }
6240
6241 u32 cik_get_csb_size(struct radeon_device *rdev)
6242 {
6243         u32 count = 0;
6244         const struct cs_section_def *sect = NULL;
6245         const struct cs_extent_def *ext = NULL;
6246
6247         if (rdev->rlc.cs_data == NULL)
6248                 return 0;
6249
6250         /* begin clear state */
6251         count += 2;
6252         /* context control state */
6253         count += 3;
6254
6255         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6256                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6257                         if (sect->id == SECT_CONTEXT)
6258                                 count += 2 + ext->reg_count;
6259                         else
6260                                 return 0;
6261                 }
6262         }
6263         /* pa_sc_raster_config/pa_sc_raster_config1 */
6264         count += 4;
6265         /* end clear state */
6266         count += 2;
6267         /* clear state */
6268         count += 2;
6269
6270         return count;
6271 }
6272
6273 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6274 {
6275         u32 count = 0, i;
6276         const struct cs_section_def *sect = NULL;
6277         const struct cs_extent_def *ext = NULL;
6278
6279         if (rdev->rlc.cs_data == NULL)
6280                 return;
6281         if (buffer == NULL)
6282                 return;
6283
6284         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6285         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6286
6287         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6288         buffer[count++] = cpu_to_le32(0x80000000);
6289         buffer[count++] = cpu_to_le32(0x80000000);
6290
6291         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6292                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6293                         if (sect->id == SECT_CONTEXT) {
6294                                 buffer[count++] =
6295                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6296                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6297                                 for (i = 0; i < ext->reg_count; i++)
6298                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6299                         } else {
6300                                 return;
6301                         }
6302                 }
6303         }
6304
6305         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6306         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6307         switch (rdev->family) {
6308         case CHIP_BONAIRE:
6309                 buffer[count++] = cpu_to_le32(0x16000012);
6310                 buffer[count++] = cpu_to_le32(0x00000000);
6311                 break;
6312         case CHIP_KAVERI:
6313                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6314                 buffer[count++] = cpu_to_le32(0x00000000);
6315                 break;
6316         case CHIP_KABINI:
6317                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6318                 buffer[count++] = cpu_to_le32(0x00000000);
6319                 break;
6320         case CHIP_HAWAII:
6321                 buffer[count++] = 0x3a00161a;
6322                 buffer[count++] = 0x0000002e;
6323                 break;
6324         default:
6325                 buffer[count++] = cpu_to_le32(0x00000000);
6326                 buffer[count++] = cpu_to_le32(0x00000000);
6327                 break;
6328         }
6329
6330         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6331         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6332
6333         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6334         buffer[count++] = cpu_to_le32(0);
6335 }
6336
6337 static void cik_init_pg(struct radeon_device *rdev)
6338 {
6339         if (rdev->pg_flags) {
6340                 cik_enable_sck_slowdown_on_pu(rdev, true);
6341                 cik_enable_sck_slowdown_on_pd(rdev, true);
6342                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6343                         cik_init_gfx_cgpg(rdev);
6344                         cik_enable_cp_pg(rdev, true);
6345                         cik_enable_gds_pg(rdev, true);
6346                 }
6347                 cik_init_ao_cu_mask(rdev);
6348                 cik_update_gfx_pg(rdev, true);
6349         }
6350 }
6351
6352 static void cik_fini_pg(struct radeon_device *rdev)
6353 {
6354         if (rdev->pg_flags) {
6355                 cik_update_gfx_pg(rdev, false);
6356                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6357                         cik_enable_cp_pg(rdev, false);
6358                         cik_enable_gds_pg(rdev, false);
6359                 }
6360         }
6361 }
6362
6363 /*
6364  * Interrupts
6365  * Starting with r6xx, interrupts are handled via a ring buffer.
6366  * Ring buffers are areas of GPU accessible memory that the GPU
6367  * writes interrupt vectors into and the host reads vectors out of.
6368  * There is a rptr (read pointer) that determines where the
6369  * host is currently reading, and a wptr (write pointer)
6370  * which determines where the GPU has written.  When the
6371  * pointers are equal, the ring is idle.  When the GPU
6372  * writes vectors to the ring buffer, it increments the
6373  * wptr.  When there is an interrupt, the host then starts
6374  * fetching commands and processing them until the pointers are
6375  * equal again at which point it updates the rptr.
6376  */
6377
6378 /**
6379  * cik_enable_interrupts - Enable the interrupt ring buffer
6380  *
6381  * @rdev: radeon_device pointer
6382  *
6383  * Enable the interrupt ring buffer (CIK).
6384  */
6385 static void cik_enable_interrupts(struct radeon_device *rdev)
6386 {
6387         u32 ih_cntl = RREG32(IH_CNTL);
6388         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6389
6390         ih_cntl |= ENABLE_INTR;
6391         ih_rb_cntl |= IH_RB_ENABLE;
6392         WREG32(IH_CNTL, ih_cntl);
6393         WREG32(IH_RB_CNTL, ih_rb_cntl);
6394         rdev->ih.enabled = true;
6395 }
6396
6397 /**
6398  * cik_disable_interrupts - Disable the interrupt ring buffer
6399  *
6400  * @rdev: radeon_device pointer
6401  *
6402  * Disable the interrupt ring buffer (CIK).
6403  */
6404 static void cik_disable_interrupts(struct radeon_device *rdev)
6405 {
6406         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6407         u32 ih_cntl = RREG32(IH_CNTL);
6408
6409         ih_rb_cntl &= ~IH_RB_ENABLE;
6410         ih_cntl &= ~ENABLE_INTR;
6411         WREG32(IH_RB_CNTL, ih_rb_cntl);
6412         WREG32(IH_CNTL, ih_cntl);
6413         /* set rptr, wptr to 0 */
6414         WREG32(IH_RB_RPTR, 0);
6415         WREG32(IH_RB_WPTR, 0);
6416         rdev->ih.enabled = false;
6417         rdev->ih.rptr = 0;
6418 }
6419
6420 /**
6421  * cik_disable_interrupt_state - Disable all interrupt sources
6422  *
6423  * @rdev: radeon_device pointer
6424  *
6425  * Clear all interrupt enable bits used by the driver (CIK).
6426  */
6427 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6428 {
6429         u32 tmp;
6430
6431         /* gfx ring */
6432         tmp = RREG32(CP_INT_CNTL_RING0) &
6433                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6434         WREG32(CP_INT_CNTL_RING0, tmp);
6435         /* sdma */
6436         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6437         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6438         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6439         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6440         /* compute queues */
6441         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6442         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6443         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6444         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6445         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6446         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6447         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6448         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6449         /* grbm */
6450         WREG32(GRBM_INT_CNTL, 0);
6451         /* vline/vblank, etc. */
6452         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6453         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6454         if (rdev->num_crtc >= 4) {
6455                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6456                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6457         }
6458         if (rdev->num_crtc >= 6) {
6459                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6460                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6461         }
6462
6463         /* dac hotplug */
6464         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6465
6466         /* digital hotplug */
6467         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6468         WREG32(DC_HPD1_INT_CONTROL, tmp);
6469         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6470         WREG32(DC_HPD2_INT_CONTROL, tmp);
6471         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6472         WREG32(DC_HPD3_INT_CONTROL, tmp);
6473         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6474         WREG32(DC_HPD4_INT_CONTROL, tmp);
6475         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6476         WREG32(DC_HPD5_INT_CONTROL, tmp);
6477         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6478         WREG32(DC_HPD6_INT_CONTROL, tmp);
6479
6480 }
6481
6482 /**
6483  * cik_irq_init - init and enable the interrupt ring
6484  *
6485  * @rdev: radeon_device pointer
6486  *
6487  * Allocate a ring buffer for the interrupt controller,
6488  * enable the RLC, disable interrupts, enable the IH
6489  * ring buffer and enable it (CIK).
6490  * Called at device load and reume.
6491  * Returns 0 for success, errors for failure.
6492  */
6493 static int cik_irq_init(struct radeon_device *rdev)
6494 {
6495         int ret = 0;
6496         int rb_bufsz;
6497         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6498
6499         /* allocate ring */
6500         ret = r600_ih_ring_alloc(rdev);
6501         if (ret)
6502                 return ret;
6503
6504         /* disable irqs */
6505         cik_disable_interrupts(rdev);
6506
6507         /* init rlc */
6508         ret = cik_rlc_resume(rdev);
6509         if (ret) {
6510                 r600_ih_ring_fini(rdev);
6511                 return ret;
6512         }
6513
6514         /* setup interrupt control */
6515         /* XXX this should actually be a bus address, not an MC address. same on older asics */
6516         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6517         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6518         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6519          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6520          */
6521         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6522         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6523         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6524         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6525
6526         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6527         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6528
6529         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6530                       IH_WPTR_OVERFLOW_CLEAR |
6531                       (rb_bufsz << 1));
6532
6533         if (rdev->wb.enabled)
6534                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6535
6536         /* set the writeback address whether it's enabled or not */
6537         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6538         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6539
6540         WREG32(IH_RB_CNTL, ih_rb_cntl);
6541
6542         /* set rptr, wptr to 0 */
6543         WREG32(IH_RB_RPTR, 0);
6544         WREG32(IH_RB_WPTR, 0);
6545
6546         /* Default settings for IH_CNTL (disabled at first) */
6547         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6548         /* RPTR_REARM only works if msi's are enabled */
6549         if (rdev->msi_enabled)
6550                 ih_cntl |= RPTR_REARM;
6551         WREG32(IH_CNTL, ih_cntl);
6552
6553         /* force the active interrupt state to all disabled */
6554         cik_disable_interrupt_state(rdev);
6555
6556         pci_set_master(rdev->pdev);
6557
6558         /* enable irqs */
6559         cik_enable_interrupts(rdev);
6560
6561         return ret;
6562 }
6563
6564 /**
6565  * cik_irq_set - enable/disable interrupt sources
6566  *
6567  * @rdev: radeon_device pointer
6568  *
6569  * Enable interrupt sources on the GPU (vblanks, hpd,
6570  * etc.) (CIK).
6571  * Returns 0 for success, errors for failure.
6572  */
6573 int cik_irq_set(struct radeon_device *rdev)
6574 {
6575         u32 cp_int_cntl;
6576         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6577         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6578         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6579         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6580         u32 grbm_int_cntl = 0;
6581         u32 dma_cntl, dma_cntl1;
6582         u32 thermal_int;
6583
6584         if (!rdev->irq.installed) {
6585                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6586                 return -EINVAL;
6587         }
6588         /* don't enable anything if the ih is disabled */
6589         if (!rdev->ih.enabled) {
6590                 cik_disable_interrupts(rdev);
6591                 /* force the active interrupt state to all disabled */
6592                 cik_disable_interrupt_state(rdev);
6593                 return 0;
6594         }
6595
6596         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6597                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6598         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6599
6600         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6601         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6602         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6603         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6604         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6605         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6606
6607         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6608         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6609
6610         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6611         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6612         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6613         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6614         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6615         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6616         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6617         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6618
6619         if (rdev->flags & RADEON_IS_IGP)
6620                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6621                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
6622         else
6623                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6624                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6625
6626         /* enable CP interrupts on all rings */
6627         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6628                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6629                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6630         }
6631         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6632                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6633                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6634                 if (ring->me == 1) {
6635                         switch (ring->pipe) {
6636                         case 0:
6637                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6638                                 break;
6639                         case 1:
6640                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6641                                 break;
6642                         case 2:
6643                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6644                                 break;
6645                         case 3:
6646                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6647                                 break;
6648                         default:
6649                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6650                                 break;
6651                         }
6652                 } else if (ring->me == 2) {
6653                         switch (ring->pipe) {
6654                         case 0:
6655                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6656                                 break;
6657                         case 1:
6658                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6659                                 break;
6660                         case 2:
6661                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6662                                 break;
6663                         case 3:
6664                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6665                                 break;
6666                         default:
6667                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6668                                 break;
6669                         }
6670                 } else {
6671                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6672                 }
6673         }
6674         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6675                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6676                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6677                 if (ring->me == 1) {
6678                         switch (ring->pipe) {
6679                         case 0:
6680                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6681                                 break;
6682                         case 1:
6683                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6684                                 break;
6685                         case 2:
6686                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6687                                 break;
6688                         case 3:
6689                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6690                                 break;
6691                         default:
6692                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6693                                 break;
6694                         }
6695                 } else if (ring->me == 2) {
6696                         switch (ring->pipe) {
6697                         case 0:
6698                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6699                                 break;
6700                         case 1:
6701                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6702                                 break;
6703                         case 2:
6704                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6705                                 break;
6706                         case 3:
6707                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6708                                 break;
6709                         default:
6710                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6711                                 break;
6712                         }
6713                 } else {
6714                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6715                 }
6716         }
6717
6718         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6719                 DRM_DEBUG("cik_irq_set: sw int dma\n");
6720                 dma_cntl |= TRAP_ENABLE;
6721         }
6722
6723         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6724                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6725                 dma_cntl1 |= TRAP_ENABLE;
6726         }
6727
6728         if (rdev->irq.crtc_vblank_int[0] ||
6729             atomic_read(&rdev->irq.pflip[0])) {
6730                 DRM_DEBUG("cik_irq_set: vblank 0\n");
6731                 crtc1 |= VBLANK_INTERRUPT_MASK;
6732         }
6733         if (rdev->irq.crtc_vblank_int[1] ||
6734             atomic_read(&rdev->irq.pflip[1])) {
6735                 DRM_DEBUG("cik_irq_set: vblank 1\n");
6736                 crtc2 |= VBLANK_INTERRUPT_MASK;
6737         }
6738         if (rdev->irq.crtc_vblank_int[2] ||
6739             atomic_read(&rdev->irq.pflip[2])) {
6740                 DRM_DEBUG("cik_irq_set: vblank 2\n");
6741                 crtc3 |= VBLANK_INTERRUPT_MASK;
6742         }
6743         if (rdev->irq.crtc_vblank_int[3] ||
6744             atomic_read(&rdev->irq.pflip[3])) {
6745                 DRM_DEBUG("cik_irq_set: vblank 3\n");
6746                 crtc4 |= VBLANK_INTERRUPT_MASK;
6747         }
6748         if (rdev->irq.crtc_vblank_int[4] ||
6749             atomic_read(&rdev->irq.pflip[4])) {
6750                 DRM_DEBUG("cik_irq_set: vblank 4\n");
6751                 crtc5 |= VBLANK_INTERRUPT_MASK;
6752         }
6753         if (rdev->irq.crtc_vblank_int[5] ||
6754             atomic_read(&rdev->irq.pflip[5])) {
6755                 DRM_DEBUG("cik_irq_set: vblank 5\n");
6756                 crtc6 |= VBLANK_INTERRUPT_MASK;
6757         }
6758         if (rdev->irq.hpd[0]) {
6759                 DRM_DEBUG("cik_irq_set: hpd 1\n");
6760                 hpd1 |= DC_HPDx_INT_EN;
6761         }
6762         if (rdev->irq.hpd[1]) {
6763                 DRM_DEBUG("cik_irq_set: hpd 2\n");
6764                 hpd2 |= DC_HPDx_INT_EN;
6765         }
6766         if (rdev->irq.hpd[2]) {
6767                 DRM_DEBUG("cik_irq_set: hpd 3\n");
6768                 hpd3 |= DC_HPDx_INT_EN;
6769         }
6770         if (rdev->irq.hpd[3]) {
6771                 DRM_DEBUG("cik_irq_set: hpd 4\n");
6772                 hpd4 |= DC_HPDx_INT_EN;
6773         }
6774         if (rdev->irq.hpd[4]) {
6775                 DRM_DEBUG("cik_irq_set: hpd 5\n");
6776                 hpd5 |= DC_HPDx_INT_EN;
6777         }
6778         if (rdev->irq.hpd[5]) {
6779                 DRM_DEBUG("cik_irq_set: hpd 6\n");
6780                 hpd6 |= DC_HPDx_INT_EN;
6781         }
6782
6783         if (rdev->irq.dpm_thermal) {
6784                 DRM_DEBUG("dpm thermal\n");
6785                 if (rdev->flags & RADEON_IS_IGP)
6786                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6787                 else
6788                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6789         }
6790
6791         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6792
6793         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6794         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6795
6796         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6797         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6798         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6799         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6800         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6801         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6802         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6803         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6804
6805         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6806
6807         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6808         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6809         if (rdev->num_crtc >= 4) {
6810                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6811                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6812         }
6813         if (rdev->num_crtc >= 6) {
6814                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6815                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6816         }
6817
6818         WREG32(DC_HPD1_INT_CONTROL, hpd1);
6819         WREG32(DC_HPD2_INT_CONTROL, hpd2);
6820         WREG32(DC_HPD3_INT_CONTROL, hpd3);
6821         WREG32(DC_HPD4_INT_CONTROL, hpd4);
6822         WREG32(DC_HPD5_INT_CONTROL, hpd5);
6823         WREG32(DC_HPD6_INT_CONTROL, hpd6);
6824
6825         if (rdev->flags & RADEON_IS_IGP)
6826                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6827         else
6828                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
6829
6830         return 0;
6831 }
6832
6833 /**
6834  * cik_irq_ack - ack interrupt sources
6835  *
6836  * @rdev: radeon_device pointer
6837  *
6838  * Ack interrupt sources on the GPU (vblanks, hpd,
6839  * etc.) (CIK).  Certain interrupts sources are sw
6840  * generated and do not require an explicit ack.
6841  */
6842 static inline void cik_irq_ack(struct radeon_device *rdev)
6843 {
6844         u32 tmp;
6845
6846         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6847         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6848         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6849         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6850         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6851         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6852         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6853
6854         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6855                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6856         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6857                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6858         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6859                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6860         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6861                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6862
6863         if (rdev->num_crtc >= 4) {
6864                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6865                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6866                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6867                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6868                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6869                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6870                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6871                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6872         }
6873
6874         if (rdev->num_crtc >= 6) {
6875                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6876                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6877                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6878                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6879                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6880                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6881                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6882                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6883         }
6884
6885         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6886                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6887                 tmp |= DC_HPDx_INT_ACK;
6888                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6889         }
6890         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6891                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6892                 tmp |= DC_HPDx_INT_ACK;
6893                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6894         }
6895         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6896                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6897                 tmp |= DC_HPDx_INT_ACK;
6898                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6899         }
6900         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6901                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6902                 tmp |= DC_HPDx_INT_ACK;
6903                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6904         }
6905         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6906                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6907                 tmp |= DC_HPDx_INT_ACK;
6908                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6909         }
6910         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6911                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6912                 tmp |= DC_HPDx_INT_ACK;
6913                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6914         }
6915 }
6916
6917 /**
6918  * cik_irq_disable - disable interrupts
6919  *
6920  * @rdev: radeon_device pointer
6921  *
6922  * Disable interrupts on the hw (CIK).
6923  */
6924 static void cik_irq_disable(struct radeon_device *rdev)
6925 {
6926         cik_disable_interrupts(rdev);
6927         /* Wait and acknowledge irq */
6928         mdelay(1);
6929         cik_irq_ack(rdev);
6930         cik_disable_interrupt_state(rdev);
6931 }
6932
6933 /**
6934  * cik_irq_disable - disable interrupts for suspend
6935  *
6936  * @rdev: radeon_device pointer
6937  *
6938  * Disable interrupts and stop the RLC (CIK).
6939  * Used for suspend.
6940  */
6941 static void cik_irq_suspend(struct radeon_device *rdev)
6942 {
6943         cik_irq_disable(rdev);
6944         cik_rlc_stop(rdev);
6945 }
6946
6947 /**
6948  * cik_irq_fini - tear down interrupt support
6949  *
6950  * @rdev: radeon_device pointer
6951  *
6952  * Disable interrupts on the hw and free the IH ring
6953  * buffer (CIK).
6954  * Used for driver unload.
6955  */
6956 static void cik_irq_fini(struct radeon_device *rdev)
6957 {
6958         cik_irq_suspend(rdev);
6959         r600_ih_ring_fini(rdev);
6960 }
6961
6962 /**
6963  * cik_get_ih_wptr - get the IH ring buffer wptr
6964  *
6965  * @rdev: radeon_device pointer
6966  *
6967  * Get the IH ring buffer wptr from either the register
6968  * or the writeback memory buffer (CIK).  Also check for
6969  * ring buffer overflow and deal with it.
6970  * Used by cik_irq_process().
6971  * Returns the value of the wptr.
6972  */
6973 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6974 {
6975         u32 wptr, tmp;
6976
6977         if (rdev->wb.enabled)
6978                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6979         else
6980                 wptr = RREG32(IH_RB_WPTR);
6981
6982         if (wptr & RB_OVERFLOW) {
6983                 /* When a ring buffer overflow happen start parsing interrupt
6984                  * from the last not overwritten vector (wptr + 16). Hopefully
6985                  * this should allow us to catchup.
6986                  */
6987                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6988                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6989                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6990                 tmp = RREG32(IH_RB_CNTL);
6991                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6992                 WREG32(IH_RB_CNTL, tmp);
6993         }
6994         return (wptr & rdev->ih.ptr_mask);
6995 }
6996
6997 /*        CIK IV Ring
6998  * Each IV ring entry is 128 bits:
6999  * [7:0]    - interrupt source id
7000  * [31:8]   - reserved
7001  * [59:32]  - interrupt source data
7002  * [63:60]  - reserved
7003  * [71:64]  - RINGID
7004  *            CP:
7005  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7006  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7007  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7008  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7009  *            PIPE_ID - ME0 0=3D
7010  *                    - ME1&2 compute dispatcher (4 pipes each)
7011  *            SDMA:
7012  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7013  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7014  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7015  * [79:72]  - VMID
7016  * [95:80]  - PASID
7017  * [127:96] - reserved
7018  */
7019 /**
7020  * cik_irq_process - interrupt handler
7021  *
7022  * @rdev: radeon_device pointer
7023  *
7024  * Interrupt hander (CIK).  Walk the IH ring,
7025  * ack interrupts and schedule work to handle
7026  * interrupt events.
7027  * Returns irq process return code.
7028  */
7029 int cik_irq_process(struct radeon_device *rdev)
7030 {
7031         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7032         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7033         u32 wptr;
7034         u32 rptr;
7035         u32 src_id, src_data, ring_id;
7036         u8 me_id, pipe_id, queue_id;
7037         u32 ring_index;
7038         bool queue_hotplug = false;
7039         bool queue_reset = false;
7040         u32 addr, status, mc_client;
7041         bool queue_thermal = false;
7042
7043         if (!rdev->ih.enabled || rdev->shutdown)
7044                 return IRQ_NONE;
7045
7046         wptr = cik_get_ih_wptr(rdev);
7047
7048 restart_ih:
7049         /* is somebody else already processing irqs? */
7050         if (atomic_xchg(&rdev->ih.lock, 1))
7051                 return IRQ_NONE;
7052
7053         rptr = rdev->ih.rptr;
7054         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7055
7056         /* Order reading of wptr vs. reading of IH ring data */
7057         rmb();
7058
7059         /* display interrupts */
7060         cik_irq_ack(rdev);
7061
7062         while (rptr != wptr) {
7063                 /* wptr/rptr are in bytes! */
7064                 ring_index = rptr / 4;
7065                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7066                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7067                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7068
7069                 switch (src_id) {
7070                 case 1: /* D1 vblank/vline */
7071                         switch (src_data) {
7072                         case 0: /* D1 vblank */
7073                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7074                                         if (rdev->irq.crtc_vblank_int[0]) {
7075                                                 drm_handle_vblank(rdev->ddev, 0);
7076                                                 rdev->pm.vblank_sync = true;
7077                                                 wake_up(&rdev->irq.vblank_queue);
7078                                         }
7079                                         if (atomic_read(&rdev->irq.pflip[0]))
7080                                                 radeon_crtc_handle_flip(rdev, 0);
7081                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7082                                         DRM_DEBUG("IH: D1 vblank\n");
7083                                 }
7084                                 break;
7085                         case 1: /* D1 vline */
7086                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7087                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7088                                         DRM_DEBUG("IH: D1 vline\n");
7089                                 }
7090                                 break;
7091                         default:
7092                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7093                                 break;
7094                         }
7095                         break;
7096                 case 2: /* D2 vblank/vline */
7097                         switch (src_data) {
7098                         case 0: /* D2 vblank */
7099                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7100                                         if (rdev->irq.crtc_vblank_int[1]) {
7101                                                 drm_handle_vblank(rdev->ddev, 1);
7102                                                 rdev->pm.vblank_sync = true;
7103                                                 wake_up(&rdev->irq.vblank_queue);
7104                                         }
7105                                         if (atomic_read(&rdev->irq.pflip[1]))
7106                                                 radeon_crtc_handle_flip(rdev, 1);
7107                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7108                                         DRM_DEBUG("IH: D2 vblank\n");
7109                                 }
7110                                 break;
7111                         case 1: /* D2 vline */
7112                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7113                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7114                                         DRM_DEBUG("IH: D2 vline\n");
7115                                 }
7116                                 break;
7117                         default:
7118                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7119                                 break;
7120                         }
7121                         break;
7122                 case 3: /* D3 vblank/vline */
7123                         switch (src_data) {
7124                         case 0: /* D3 vblank */
7125                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7126                                         if (rdev->irq.crtc_vblank_int[2]) {
7127                                                 drm_handle_vblank(rdev->ddev, 2);
7128                                                 rdev->pm.vblank_sync = true;
7129                                                 wake_up(&rdev->irq.vblank_queue);
7130                                         }
7131                                         if (atomic_read(&rdev->irq.pflip[2]))
7132                                                 radeon_crtc_handle_flip(rdev, 2);
7133                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7134                                         DRM_DEBUG("IH: D3 vblank\n");
7135                                 }
7136                                 break;
7137                         case 1: /* D3 vline */
7138                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7139                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7140                                         DRM_DEBUG("IH: D3 vline\n");
7141                                 }
7142                                 break;
7143                         default:
7144                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7145                                 break;
7146                         }
7147                         break;
7148                 case 4: /* D4 vblank/vline */
7149                         switch (src_data) {
7150                         case 0: /* D4 vblank */
7151                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7152                                         if (rdev->irq.crtc_vblank_int[3]) {
7153                                                 drm_handle_vblank(rdev->ddev, 3);
7154                                                 rdev->pm.vblank_sync = true;
7155                                                 wake_up(&rdev->irq.vblank_queue);
7156                                         }
7157                                         if (atomic_read(&rdev->irq.pflip[3]))
7158                                                 radeon_crtc_handle_flip(rdev, 3);
7159                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7160                                         DRM_DEBUG("IH: D4 vblank\n");
7161                                 }
7162                                 break;
7163                         case 1: /* D4 vline */
7164                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7165                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7166                                         DRM_DEBUG("IH: D4 vline\n");
7167                                 }
7168                                 break;
7169                         default:
7170                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7171                                 break;
7172                         }
7173                         break;
7174                 case 5: /* D5 vblank/vline */
7175                         switch (src_data) {
7176                         case 0: /* D5 vblank */
7177                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7178                                         if (rdev->irq.crtc_vblank_int[4]) {
7179                                                 drm_handle_vblank(rdev->ddev, 4);
7180                                                 rdev->pm.vblank_sync = true;
7181                                                 wake_up(&rdev->irq.vblank_queue);
7182                                         }
7183                                         if (atomic_read(&rdev->irq.pflip[4]))
7184                                                 radeon_crtc_handle_flip(rdev, 4);
7185                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7186                                         DRM_DEBUG("IH: D5 vblank\n");
7187                                 }
7188                                 break;
7189                         case 1: /* D5 vline */
7190                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7191                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7192                                         DRM_DEBUG("IH: D5 vline\n");
7193                                 }
7194                                 break;
7195                         default:
7196                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7197                                 break;
7198                         }
7199                         break;
7200                 case 6: /* D6 vblank/vline */
7201                         switch (src_data) {
7202                         case 0: /* D6 vblank */
7203                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7204                                         if (rdev->irq.crtc_vblank_int[5]) {
7205                                                 drm_handle_vblank(rdev->ddev, 5);
7206                                                 rdev->pm.vblank_sync = true;
7207                                                 wake_up(&rdev->irq.vblank_queue);
7208                                         }
7209                                         if (atomic_read(&rdev->irq.pflip[5]))
7210                                                 radeon_crtc_handle_flip(rdev, 5);
7211                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7212                                         DRM_DEBUG("IH: D6 vblank\n");
7213                                 }
7214                                 break;
7215                         case 1: /* D6 vline */
7216                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7217                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7218                                         DRM_DEBUG("IH: D6 vline\n");
7219                                 }
7220                                 break;
7221                         default:
7222                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7223                                 break;
7224                         }
7225                         break;
7226                 case 42: /* HPD hotplug */
7227                         switch (src_data) {
7228                         case 0:
7229                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7230                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7231                                         queue_hotplug = true;
7232                                         DRM_DEBUG("IH: HPD1\n");
7233                                 }
7234                                 break;
7235                         case 1:
7236                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7237                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7238                                         queue_hotplug = true;
7239                                         DRM_DEBUG("IH: HPD2\n");
7240                                 }
7241                                 break;
7242                         case 2:
7243                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7244                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7245                                         queue_hotplug = true;
7246                                         DRM_DEBUG("IH: HPD3\n");
7247                                 }
7248                                 break;
7249                         case 3:
7250                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7251                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7252                                         queue_hotplug = true;
7253                                         DRM_DEBUG("IH: HPD4\n");
7254                                 }
7255                                 break;
7256                         case 4:
7257                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7258                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7259                                         queue_hotplug = true;
7260                                         DRM_DEBUG("IH: HPD5\n");
7261                                 }
7262                                 break;
7263                         case 5:
7264                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7265                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7266                                         queue_hotplug = true;
7267                                         DRM_DEBUG("IH: HPD6\n");
7268                                 }
7269                                 break;
7270                         default:
7271                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7272                                 break;
7273                         }
7274                         break;
7275                 case 124: /* UVD */
7276                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7277                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7278                         break;
7279                 case 146:
7280                 case 147:
7281                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7282                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7283                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7284                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7285                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7286                                 addr);
7287                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7288                                 status);
7289                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7290                         /* reset addr and status */
7291                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7292                         break;
7293                 case 176: /* GFX RB CP_INT */
7294                 case 177: /* GFX IB CP_INT */
7295                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7296                         break;
7297                 case 181: /* CP EOP event */
7298                         DRM_DEBUG("IH: CP EOP\n");
7299                         /* XXX check the bitfield order! */
7300                         me_id = (ring_id & 0x60) >> 5;
7301                         pipe_id = (ring_id & 0x18) >> 3;
7302                         queue_id = (ring_id & 0x7) >> 0;
7303                         switch (me_id) {
7304                         case 0:
7305                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7306                                 break;
7307                         case 1:
7308                         case 2:
7309                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7310                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7311                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7312                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7313                                 break;
7314                         }
7315                         break;
7316                 case 184: /* CP Privileged reg access */
7317                         DRM_ERROR("Illegal register access in command stream\n");
7318                         /* XXX check the bitfield order! */
7319                         me_id = (ring_id & 0x60) >> 5;
7320                         pipe_id = (ring_id & 0x18) >> 3;
7321                         queue_id = (ring_id & 0x7) >> 0;
7322                         switch (me_id) {
7323                         case 0:
7324                                 /* This results in a full GPU reset, but all we need to do is soft
7325                                  * reset the CP for gfx
7326                                  */
7327                                 queue_reset = true;
7328                                 break;
7329                         case 1:
7330                                 /* XXX compute */
7331                                 queue_reset = true;
7332                                 break;
7333                         case 2:
7334                                 /* XXX compute */
7335                                 queue_reset = true;
7336                                 break;
7337                         }
7338                         break;
7339                 case 185: /* CP Privileged inst */
7340                         DRM_ERROR("Illegal instruction in command stream\n");
7341                         /* XXX check the bitfield order! */
7342                         me_id = (ring_id & 0x60) >> 5;
7343                         pipe_id = (ring_id & 0x18) >> 3;
7344                         queue_id = (ring_id & 0x7) >> 0;
7345                         switch (me_id) {
7346                         case 0:
7347                                 /* This results in a full GPU reset, but all we need to do is soft
7348                                  * reset the CP for gfx
7349                                  */
7350                                 queue_reset = true;
7351                                 break;
7352                         case 1:
7353                                 /* XXX compute */
7354                                 queue_reset = true;
7355                                 break;
7356                         case 2:
7357                                 /* XXX compute */
7358                                 queue_reset = true;
7359                                 break;
7360                         }
7361                         break;
7362                 case 224: /* SDMA trap event */
7363                         /* XXX check the bitfield order! */
7364                         me_id = (ring_id & 0x3) >> 0;
7365                         queue_id = (ring_id & 0xc) >> 2;
7366                         DRM_DEBUG("IH: SDMA trap\n");
7367                         switch (me_id) {
7368                         case 0:
7369                                 switch (queue_id) {
7370                                 case 0:
7371                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7372                                         break;
7373                                 case 1:
7374                                         /* XXX compute */
7375                                         break;
7376                                 case 2:
7377                                         /* XXX compute */
7378                                         break;
7379                                 }
7380                                 break;
7381                         case 1:
7382                                 switch (queue_id) {
7383                                 case 0:
7384                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7385                                         break;
7386                                 case 1:
7387                                         /* XXX compute */
7388                                         break;
7389                                 case 2:
7390                                         /* XXX compute */
7391                                         break;
7392                                 }
7393                                 break;
7394                         }
7395                         break;
7396                 case 230: /* thermal low to high */
7397                         DRM_DEBUG("IH: thermal low to high\n");
7398                         rdev->pm.dpm.thermal.high_to_low = false;
7399                         queue_thermal = true;
7400                         break;
7401                 case 231: /* thermal high to low */
7402                         DRM_DEBUG("IH: thermal high to low\n");
7403                         rdev->pm.dpm.thermal.high_to_low = true;
7404                         queue_thermal = true;
7405                         break;
7406                 case 233: /* GUI IDLE */
7407                         DRM_DEBUG("IH: GUI idle\n");
7408                         break;
7409                 case 241: /* SDMA Privileged inst */
7410                 case 247: /* SDMA Privileged inst */
7411                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
7412                         /* XXX check the bitfield order! */
7413                         me_id = (ring_id & 0x3) >> 0;
7414                         queue_id = (ring_id & 0xc) >> 2;
7415                         switch (me_id) {
7416                         case 0:
7417                                 switch (queue_id) {
7418                                 case 0:
7419                                         queue_reset = true;
7420                                         break;
7421                                 case 1:
7422                                         /* XXX compute */
7423                                         queue_reset = true;
7424                                         break;
7425                                 case 2:
7426                                         /* XXX compute */
7427                                         queue_reset = true;
7428                                         break;
7429                                 }
7430                                 break;
7431                         case 1:
7432                                 switch (queue_id) {
7433                                 case 0:
7434                                         queue_reset = true;
7435                                         break;
7436                                 case 1:
7437                                         /* XXX compute */
7438                                         queue_reset = true;
7439                                         break;
7440                                 case 2:
7441                                         /* XXX compute */
7442                                         queue_reset = true;
7443                                         break;
7444                                 }
7445                                 break;
7446                         }
7447                         break;
7448                 default:
7449                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7450                         break;
7451                 }
7452
7453                 /* wptr/rptr are in bytes! */
7454                 rptr += 16;
7455                 rptr &= rdev->ih.ptr_mask;
7456         }
7457         if (queue_hotplug)
7458                 schedule_work(&rdev->hotplug_work);
7459         if (queue_reset)
7460                 schedule_work(&rdev->reset_work);
7461         if (queue_thermal)
7462                 schedule_work(&rdev->pm.dpm.thermal.work);
7463         rdev->ih.rptr = rptr;
7464         WREG32(IH_RB_RPTR, rdev->ih.rptr);
7465         atomic_set(&rdev->ih.lock, 0);
7466
7467         /* make sure wptr hasn't changed while processing */
7468         wptr = cik_get_ih_wptr(rdev);
7469         if (wptr != rptr)
7470                 goto restart_ih;
7471
7472         return IRQ_HANDLED;
7473 }
7474
7475 /*
7476  * startup/shutdown callbacks
7477  */
7478 /**
7479  * cik_startup - program the asic to a functional state
7480  *
7481  * @rdev: radeon_device pointer
7482  *
7483  * Programs the asic to a functional state (CIK).
7484  * Called by cik_init() and cik_resume().
7485  * Returns 0 for success, error for failure.
7486  */
7487 static int cik_startup(struct radeon_device *rdev)
7488 {
7489         struct radeon_ring *ring;
7490         int r;
7491
7492         /* enable pcie gen2/3 link */
7493         cik_pcie_gen3_enable(rdev);
7494         /* enable aspm */
7495         cik_program_aspm(rdev);
7496
7497         /* scratch needs to be initialized before MC */
7498         r = r600_vram_scratch_init(rdev);
7499         if (r)
7500                 return r;
7501
7502         cik_mc_program(rdev);
7503
7504         if (rdev->flags & RADEON_IS_IGP) {
7505                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7506                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
7507                         r = cik_init_microcode(rdev);
7508                         if (r) {
7509                                 DRM_ERROR("Failed to load firmware!\n");
7510                                 return r;
7511                         }
7512                 }
7513         } else {
7514                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7515                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7516                     !rdev->mc_fw) {
7517                         r = cik_init_microcode(rdev);
7518                         if (r) {
7519                                 DRM_ERROR("Failed to load firmware!\n");
7520                                 return r;
7521                         }
7522                 }
7523
7524                 r = ci_mc_load_microcode(rdev);
7525                 if (r) {
7526                         DRM_ERROR("Failed to load MC firmware!\n");
7527                         return r;
7528                 }
7529         }
7530
7531         r = cik_pcie_gart_enable(rdev);
7532         if (r)
7533                 return r;
7534         cik_gpu_init(rdev);
7535
7536         /* allocate rlc buffers */
7537         if (rdev->flags & RADEON_IS_IGP) {
7538                 if (rdev->family == CHIP_KAVERI) {
7539                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7540                         rdev->rlc.reg_list_size =
7541                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7542                 } else {
7543                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7544                         rdev->rlc.reg_list_size =
7545                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7546                 }
7547         }
7548         rdev->rlc.cs_data = ci_cs_data;
7549         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7550         r = sumo_rlc_init(rdev);
7551         if (r) {
7552                 DRM_ERROR("Failed to init rlc BOs!\n");
7553                 return r;
7554         }
7555
7556         /* allocate wb buffer */
7557         r = radeon_wb_init(rdev);
7558         if (r)
7559                 return r;
7560
7561         /* allocate mec buffers */
7562         r = cik_mec_init(rdev);
7563         if (r) {
7564                 DRM_ERROR("Failed to init MEC BOs!\n");
7565                 return r;
7566         }
7567
7568         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7569         if (r) {
7570                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7571                 return r;
7572         }
7573
7574         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7575         if (r) {
7576                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7577                 return r;
7578         }
7579
7580         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7581         if (r) {
7582                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7583                 return r;
7584         }
7585
7586         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7587         if (r) {
7588                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7589                 return r;
7590         }
7591
7592         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7593         if (r) {
7594                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7595                 return r;
7596         }
7597
7598         r = radeon_uvd_resume(rdev);
7599         if (!r) {
7600                 r = uvd_v4_2_resume(rdev);
7601                 if (!r) {
7602                         r = radeon_fence_driver_start_ring(rdev,
7603                                                            R600_RING_TYPE_UVD_INDEX);
7604                         if (r)
7605                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7606                 }
7607         }
7608         if (r)
7609                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7610
7611         /* Enable IRQ */
7612         if (!rdev->irq.installed) {
7613                 r = radeon_irq_kms_init(rdev);
7614                 if (r)
7615                         return r;
7616         }
7617
7618         r = cik_irq_init(rdev);
7619         if (r) {
7620                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7621                 radeon_irq_kms_fini(rdev);
7622                 return r;
7623         }
7624         cik_irq_set(rdev);
7625
7626         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7627         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7628                              CP_RB0_RPTR, CP_RB0_WPTR,
7629                              PACKET3(PACKET3_NOP, 0x3FFF));
7630         if (r)
7631                 return r;
7632
7633         /* set up the compute queues */
7634         /* type-2 packets are deprecated on MEC, use type-3 instead */
7635         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7636         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7637                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7638                              PACKET3(PACKET3_NOP, 0x3FFF));
7639         if (r)
7640                 return r;
7641         ring->me = 1; /* first MEC */
7642         ring->pipe = 0; /* first pipe */
7643         ring->queue = 0; /* first queue */
7644         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7645
7646         /* type-2 packets are deprecated on MEC, use type-3 instead */
7647         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7648         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7649                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7650                              PACKET3(PACKET3_NOP, 0x3FFF));
7651         if (r)
7652                 return r;
7653         /* dGPU only have 1 MEC */
7654         ring->me = 1; /* first MEC */
7655         ring->pipe = 0; /* first pipe */
7656         ring->queue = 1; /* second queue */
7657         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7658
7659         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7660         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7661                              SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7662                              SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7663                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7664         if (r)
7665                 return r;
7666
7667         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7668         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7669                              SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7670                              SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7671                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7672         if (r)
7673                 return r;
7674
7675         r = cik_cp_resume(rdev);
7676         if (r)
7677                 return r;
7678
7679         r = cik_sdma_resume(rdev);
7680         if (r)
7681                 return r;
7682
7683         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7684         if (ring->ring_size) {
7685                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7686                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7687                                      RADEON_CP_PACKET2);
7688                 if (!r)
7689                         r = uvd_v1_0_init(rdev);
7690                 if (r)
7691                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7692         }
7693
7694         r = radeon_ib_pool_init(rdev);
7695         if (r) {
7696                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7697                 return r;
7698         }
7699
7700         r = radeon_vm_manager_init(rdev);
7701         if (r) {
7702                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7703                 return r;
7704         }
7705
7706         r = dce6_audio_init(rdev);
7707         if (r)
7708                 return r;
7709
7710         return 0;
7711 }
7712
7713 /**
7714  * cik_resume - resume the asic to a functional state
7715  *
7716  * @rdev: radeon_device pointer
7717  *
7718  * Programs the asic to a functional state (CIK).
7719  * Called at resume.
7720  * Returns 0 for success, error for failure.
7721  */
7722 int cik_resume(struct radeon_device *rdev)
7723 {
7724         int r;
7725
7726         /* post card */
7727         atom_asic_init(rdev->mode_info.atom_context);
7728
7729         /* init golden registers */
7730         cik_init_golden_registers(rdev);
7731
7732         rdev->accel_working = true;
7733         r = cik_startup(rdev);
7734         if (r) {
7735                 DRM_ERROR("cik startup failed on resume\n");
7736                 rdev->accel_working = false;
7737                 return r;
7738         }
7739
7740         return r;
7741
7742 }
7743
7744 /**
7745  * cik_suspend - suspend the asic
7746  *
7747  * @rdev: radeon_device pointer
7748  *
7749  * Bring the chip into a state suitable for suspend (CIK).
7750  * Called at suspend.
7751  * Returns 0 for success.
7752  */
7753 int cik_suspend(struct radeon_device *rdev)
7754 {
7755         dce6_audio_fini(rdev);
7756         radeon_vm_manager_fini(rdev);
7757         cik_cp_enable(rdev, false);
7758         cik_sdma_enable(rdev, false);
7759         uvd_v1_0_fini(rdev);
7760         radeon_uvd_suspend(rdev);
7761         cik_fini_pg(rdev);
7762         cik_fini_cg(rdev);
7763         cik_irq_suspend(rdev);
7764         radeon_wb_disable(rdev);
7765         cik_pcie_gart_disable(rdev);
7766         return 0;
7767 }
7768
7769 /* Plan is to move initialization in that function and use
7770  * helper function so that radeon_device_init pretty much
7771  * do nothing more than calling asic specific function. This
7772  * should also allow to remove a bunch of callback function
7773  * like vram_info.
7774  */
7775 /**
7776  * cik_init - asic specific driver and hw init
7777  *
7778  * @rdev: radeon_device pointer
7779  *
7780  * Setup asic specific driver variables and program the hw
7781  * to a functional state (CIK).
7782  * Called at driver startup.
7783  * Returns 0 for success, errors for failure.
7784  */
7785 int cik_init(struct radeon_device *rdev)
7786 {
7787         struct radeon_ring *ring;
7788         int r;
7789
7790         /* Read BIOS */
7791         if (!radeon_get_bios(rdev)) {
7792                 if (ASIC_IS_AVIVO(rdev))
7793                         return -EINVAL;
7794         }
7795         /* Must be an ATOMBIOS */
7796         if (!rdev->is_atom_bios) {
7797                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7798                 return -EINVAL;
7799         }
7800         r = radeon_atombios_init(rdev);
7801         if (r)
7802                 return r;
7803
7804         /* Post card if necessary */
7805         if (!radeon_card_posted(rdev)) {
7806                 if (!rdev->bios) {
7807                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7808                         return -EINVAL;
7809                 }
7810                 DRM_INFO("GPU not posted. posting now...\n");
7811                 atom_asic_init(rdev->mode_info.atom_context);
7812         }
7813         /* init golden registers */
7814         cik_init_golden_registers(rdev);
7815         /* Initialize scratch registers */
7816         cik_scratch_init(rdev);
7817         /* Initialize surface registers */
7818         radeon_surface_init(rdev);
7819         /* Initialize clocks */
7820         radeon_get_clock_info(rdev->ddev);
7821
7822         /* Fence driver */
7823         r = radeon_fence_driver_init(rdev);
7824         if (r)
7825                 return r;
7826
7827         /* initialize memory controller */
7828         r = cik_mc_init(rdev);
7829         if (r)
7830                 return r;
7831         /* Memory manager */
7832         r = radeon_bo_init(rdev);
7833         if (r)
7834                 return r;
7835
7836         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7837         ring->ring_obj = NULL;
7838         r600_ring_init(rdev, ring, 1024 * 1024);
7839
7840         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7841         ring->ring_obj = NULL;
7842         r600_ring_init(rdev, ring, 1024 * 1024);
7843         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
7844         if (r)
7845                 return r;
7846
7847         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7848         ring->ring_obj = NULL;
7849         r600_ring_init(rdev, ring, 1024 * 1024);
7850         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
7851         if (r)
7852                 return r;
7853
7854         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7855         ring->ring_obj = NULL;
7856         r600_ring_init(rdev, ring, 256 * 1024);
7857
7858         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7859         ring->ring_obj = NULL;
7860         r600_ring_init(rdev, ring, 256 * 1024);
7861
7862         r = radeon_uvd_init(rdev);
7863         if (!r) {
7864                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7865                 ring->ring_obj = NULL;
7866                 r600_ring_init(rdev, ring, 4096);
7867         }
7868
7869         rdev->ih.ring_obj = NULL;
7870         r600_ih_ring_init(rdev, 64 * 1024);
7871
7872         r = r600_pcie_gart_init(rdev);
7873         if (r)
7874                 return r;
7875
7876         rdev->accel_working = true;
7877         r = cik_startup(rdev);
7878         if (r) {
7879                 dev_err(rdev->dev, "disabling GPU acceleration\n");
7880                 cik_cp_fini(rdev);
7881                 cik_sdma_fini(rdev);
7882                 cik_irq_fini(rdev);
7883                 sumo_rlc_fini(rdev);
7884                 cik_mec_fini(rdev);
7885                 radeon_wb_fini(rdev);
7886                 radeon_ib_pool_fini(rdev);
7887                 radeon_vm_manager_fini(rdev);
7888                 radeon_irq_kms_fini(rdev);
7889                 cik_pcie_gart_fini(rdev);
7890                 rdev->accel_working = false;
7891         }
7892
7893         /* Don't start up if the MC ucode is missing.
7894          * The default clocks and voltages before the MC ucode
7895          * is loaded are not suffient for advanced operations.
7896          */
7897         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7898                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7899                 return -EINVAL;
7900         }
7901
7902         return 0;
7903 }
7904
7905 /**
7906  * cik_fini - asic specific driver and hw fini
7907  *
7908  * @rdev: radeon_device pointer
7909  *
7910  * Tear down the asic specific driver variables and program the hw
7911  * to an idle state (CIK).
7912  * Called at driver unload.
7913  */
7914 void cik_fini(struct radeon_device *rdev)
7915 {
7916         cik_cp_fini(rdev);
7917         cik_sdma_fini(rdev);
7918         cik_fini_pg(rdev);
7919         cik_fini_cg(rdev);
7920         cik_irq_fini(rdev);
7921         sumo_rlc_fini(rdev);
7922         cik_mec_fini(rdev);
7923         radeon_wb_fini(rdev);
7924         radeon_vm_manager_fini(rdev);
7925         radeon_ib_pool_fini(rdev);
7926         radeon_irq_kms_fini(rdev);
7927         uvd_v1_0_fini(rdev);
7928         radeon_uvd_fini(rdev);
7929         cik_pcie_gart_fini(rdev);
7930         r600_vram_scratch_fini(rdev);
7931         radeon_gem_fini(rdev);
7932         radeon_fence_driver_fini(rdev);
7933         radeon_bo_fini(rdev);
7934         radeon_atombios_fini(rdev);
7935         kfree(rdev->bios);
7936         rdev->bios = NULL;
7937 }
7938
7939 void dce8_program_fmt(struct drm_encoder *encoder)
7940 {
7941         struct drm_device *dev = encoder->dev;
7942         struct radeon_device *rdev = dev->dev_private;
7943         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
7944         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
7945         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
7946         int bpc = 0;
7947         u32 tmp = 0;
7948         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
7949
7950         if (connector) {
7951                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
7952                 bpc = radeon_get_monitor_bpc(connector);
7953                 dither = radeon_connector->dither;
7954         }
7955
7956         /* LVDS/eDP FMT is set up by atom */
7957         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
7958                 return;
7959
7960         /* not needed for analog */
7961         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
7962             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
7963                 return;
7964
7965         if (bpc == 0)
7966                 return;
7967
7968         switch (bpc) {
7969         case 6:
7970                 if (dither == RADEON_FMT_DITHER_ENABLE)
7971                         /* XXX sort out optimal dither settings */
7972                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7973                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
7974                 else
7975                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
7976                 break;
7977         case 8:
7978                 if (dither == RADEON_FMT_DITHER_ENABLE)
7979                         /* XXX sort out optimal dither settings */
7980                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7981                                 FMT_RGB_RANDOM_ENABLE |
7982                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
7983                 else
7984                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
7985                 break;
7986         case 10:
7987                 if (dither == RADEON_FMT_DITHER_ENABLE)
7988                         /* XXX sort out optimal dither settings */
7989                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7990                                 FMT_RGB_RANDOM_ENABLE |
7991                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
7992                 else
7993                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
7994                 break;
7995         default:
7996                 /* not needed */
7997                 break;
7998         }
7999
8000         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8001 }
8002
8003 /* display watermark setup */
8004 /**
8005  * dce8_line_buffer_adjust - Set up the line buffer
8006  *
8007  * @rdev: radeon_device pointer
8008  * @radeon_crtc: the selected display controller
8009  * @mode: the current display mode on the selected display
8010  * controller
8011  *
8012  * Setup up the line buffer allocation for
8013  * the selected display controller (CIK).
8014  * Returns the line buffer size in pixels.
8015  */
8016 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8017                                    struct radeon_crtc *radeon_crtc,
8018                                    struct drm_display_mode *mode)
8019 {
8020         u32 tmp, buffer_alloc, i;
8021         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8022         /*
8023          * Line Buffer Setup
8024          * There are 6 line buffers, one for each display controllers.
8025          * There are 3 partitions per LB. Select the number of partitions
8026          * to enable based on the display width.  For display widths larger
8027          * than 4096, you need use to use 2 display controllers and combine
8028          * them using the stereo blender.
8029          */
8030         if (radeon_crtc->base.enabled && mode) {
8031                 if (mode->crtc_hdisplay < 1920) {
8032                         tmp = 1;
8033                         buffer_alloc = 2;
8034                 } else if (mode->crtc_hdisplay < 2560) {
8035                         tmp = 2;
8036                         buffer_alloc = 2;
8037                 } else if (mode->crtc_hdisplay < 4096) {
8038                         tmp = 0;
8039                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8040                 } else {
8041                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8042                         tmp = 0;
8043                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8044                 }
8045         } else {
8046                 tmp = 1;
8047                 buffer_alloc = 0;
8048         }
8049
8050         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8051                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8052
8053         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8054                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8055         for (i = 0; i < rdev->usec_timeout; i++) {
8056                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8057                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8058                         break;
8059                 udelay(1);
8060         }
8061
8062         if (radeon_crtc->base.enabled && mode) {
8063                 switch (tmp) {
8064                 case 0:
8065                 default:
8066                         return 4096 * 2;
8067                 case 1:
8068                         return 1920 * 2;
8069                 case 2:
8070                         return 2560 * 2;
8071                 }
8072         }
8073
8074         /* controller not enabled, so no lb used */
8075         return 0;
8076 }
8077
8078 /**
8079  * cik_get_number_of_dram_channels - get the number of dram channels
8080  *
8081  * @rdev: radeon_device pointer
8082  *
8083  * Look up the number of video ram channels (CIK).
8084  * Used for display watermark bandwidth calculations
8085  * Returns the number of dram channels
8086  */
8087 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8088 {
8089         u32 tmp = RREG32(MC_SHARED_CHMAP);
8090
8091         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8092         case 0:
8093         default:
8094                 return 1;
8095         case 1:
8096                 return 2;
8097         case 2:
8098                 return 4;
8099         case 3:
8100                 return 8;
8101         case 4:
8102                 return 3;
8103         case 5:
8104                 return 6;
8105         case 6:
8106                 return 10;
8107         case 7:
8108                 return 12;
8109         case 8:
8110                 return 16;
8111         }
8112 }
8113
8114 struct dce8_wm_params {
8115         u32 dram_channels; /* number of dram channels */
8116         u32 yclk;          /* bandwidth per dram data pin in kHz */
8117         u32 sclk;          /* engine clock in kHz */
8118         u32 disp_clk;      /* display clock in kHz */
8119         u32 src_width;     /* viewport width */
8120         u32 active_time;   /* active display time in ns */
8121         u32 blank_time;    /* blank time in ns */
8122         bool interlaced;    /* mode is interlaced */
8123         fixed20_12 vsc;    /* vertical scale ratio */
8124         u32 num_heads;     /* number of active crtcs */
8125         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8126         u32 lb_size;       /* line buffer allocated to pipe */
8127         u32 vtaps;         /* vertical scaler taps */
8128 };
8129
8130 /**
8131  * dce8_dram_bandwidth - get the dram bandwidth
8132  *
8133  * @wm: watermark calculation data
8134  *
8135  * Calculate the raw dram bandwidth (CIK).
8136  * Used for display watermark bandwidth calculations
8137  * Returns the dram bandwidth in MBytes/s
8138  */
8139 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8140 {
8141         /* Calculate raw DRAM Bandwidth */
8142         fixed20_12 dram_efficiency; /* 0.7 */
8143         fixed20_12 yclk, dram_channels, bandwidth;
8144         fixed20_12 a;
8145
8146         a.full = dfixed_const(1000);
8147         yclk.full = dfixed_const(wm->yclk);
8148         yclk.full = dfixed_div(yclk, a);
8149         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8150         a.full = dfixed_const(10);
8151         dram_efficiency.full = dfixed_const(7);
8152         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8153         bandwidth.full = dfixed_mul(dram_channels, yclk);
8154         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8155
8156         return dfixed_trunc(bandwidth);
8157 }
8158
8159 /**
8160  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8161  *
8162  * @wm: watermark calculation data
8163  *
8164  * Calculate the dram bandwidth used for display (CIK).
8165  * Used for display watermark bandwidth calculations
8166  * Returns the dram bandwidth for display in MBytes/s
8167  */
8168 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8169 {
8170         /* Calculate DRAM Bandwidth and the part allocated to display. */
8171         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8172         fixed20_12 yclk, dram_channels, bandwidth;
8173         fixed20_12 a;
8174
8175         a.full = dfixed_const(1000);
8176         yclk.full = dfixed_const(wm->yclk);
8177         yclk.full = dfixed_div(yclk, a);
8178         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8179         a.full = dfixed_const(10);
8180         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8181         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8182         bandwidth.full = dfixed_mul(dram_channels, yclk);
8183         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8184
8185         return dfixed_trunc(bandwidth);
8186 }
8187
8188 /**
8189  * dce8_data_return_bandwidth - get the data return bandwidth
8190  *
8191  * @wm: watermark calculation data
8192  *
8193  * Calculate the data return bandwidth used for display (CIK).
8194  * Used for display watermark bandwidth calculations
8195  * Returns the data return bandwidth in MBytes/s
8196  */
8197 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8198 {
8199         /* Calculate the display Data return Bandwidth */
8200         fixed20_12 return_efficiency; /* 0.8 */
8201         fixed20_12 sclk, bandwidth;
8202         fixed20_12 a;
8203
8204         a.full = dfixed_const(1000);
8205         sclk.full = dfixed_const(wm->sclk);
8206         sclk.full = dfixed_div(sclk, a);
8207         a.full = dfixed_const(10);
8208         return_efficiency.full = dfixed_const(8);
8209         return_efficiency.full = dfixed_div(return_efficiency, a);
8210         a.full = dfixed_const(32);
8211         bandwidth.full = dfixed_mul(a, sclk);
8212         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8213
8214         return dfixed_trunc(bandwidth);
8215 }
8216
8217 /**
8218  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8219  *
8220  * @wm: watermark calculation data
8221  *
8222  * Calculate the dmif bandwidth used for display (CIK).
8223  * Used for display watermark bandwidth calculations
8224  * Returns the dmif bandwidth in MBytes/s
8225  */
8226 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8227 {
8228         /* Calculate the DMIF Request Bandwidth */
8229         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8230         fixed20_12 disp_clk, bandwidth;
8231         fixed20_12 a, b;
8232
8233         a.full = dfixed_const(1000);
8234         disp_clk.full = dfixed_const(wm->disp_clk);
8235         disp_clk.full = dfixed_div(disp_clk, a);
8236         a.full = dfixed_const(32);
8237         b.full = dfixed_mul(a, disp_clk);
8238
8239         a.full = dfixed_const(10);
8240         disp_clk_request_efficiency.full = dfixed_const(8);
8241         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8242
8243         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8244
8245         return dfixed_trunc(bandwidth);
8246 }
8247
8248 /**
8249  * dce8_available_bandwidth - get the min available bandwidth
8250  *
8251  * @wm: watermark calculation data
8252  *
8253  * Calculate the min available bandwidth used for display (CIK).
8254  * Used for display watermark bandwidth calculations
8255  * Returns the min available bandwidth in MBytes/s
8256  */
8257 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8258 {
8259         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8260         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8261         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8262         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8263
8264         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8265 }
8266
8267 /**
8268  * dce8_average_bandwidth - get the average available bandwidth
8269  *
8270  * @wm: watermark calculation data
8271  *
8272  * Calculate the average available bandwidth used for display (CIK).
8273  * Used for display watermark bandwidth calculations
8274  * Returns the average available bandwidth in MBytes/s
8275  */
8276 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8277 {
8278         /* Calculate the display mode Average Bandwidth
8279          * DisplayMode should contain the source and destination dimensions,
8280          * timing, etc.
8281          */
8282         fixed20_12 bpp;
8283         fixed20_12 line_time;
8284         fixed20_12 src_width;
8285         fixed20_12 bandwidth;
8286         fixed20_12 a;
8287
8288         a.full = dfixed_const(1000);
8289         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8290         line_time.full = dfixed_div(line_time, a);
8291         bpp.full = dfixed_const(wm->bytes_per_pixel);
8292         src_width.full = dfixed_const(wm->src_width);
8293         bandwidth.full = dfixed_mul(src_width, bpp);
8294         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8295         bandwidth.full = dfixed_div(bandwidth, line_time);
8296
8297         return dfixed_trunc(bandwidth);
8298 }
8299
8300 /**
8301  * dce8_latency_watermark - get the latency watermark
8302  *
8303  * @wm: watermark calculation data
8304  *
8305  * Calculate the latency watermark (CIK).
8306  * Used for display watermark bandwidth calculations
8307  * Returns the latency watermark in ns
8308  */
8309 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8310 {
8311         /* First calculate the latency in ns */
8312         u32 mc_latency = 2000; /* 2000 ns. */
8313         u32 available_bandwidth = dce8_available_bandwidth(wm);
8314         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8315         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8316         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8317         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8318                 (wm->num_heads * cursor_line_pair_return_time);
8319         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8320         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8321         u32 tmp, dmif_size = 12288;
8322         fixed20_12 a, b, c;
8323
8324         if (wm->num_heads == 0)
8325                 return 0;
8326
8327         a.full = dfixed_const(2);
8328         b.full = dfixed_const(1);
8329         if ((wm->vsc.full > a.full) ||
8330             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8331             (wm->vtaps >= 5) ||
8332             ((wm->vsc.full >= a.full) && wm->interlaced))
8333                 max_src_lines_per_dst_line = 4;
8334         else
8335                 max_src_lines_per_dst_line = 2;
8336
8337         a.full = dfixed_const(available_bandwidth);
8338         b.full = dfixed_const(wm->num_heads);
8339         a.full = dfixed_div(a, b);
8340
8341         b.full = dfixed_const(mc_latency + 512);
8342         c.full = dfixed_const(wm->disp_clk);
8343         b.full = dfixed_div(b, c);
8344
8345         c.full = dfixed_const(dmif_size);
8346         b.full = dfixed_div(c, b);
8347
8348         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8349
8350         b.full = dfixed_const(1000);
8351         c.full = dfixed_const(wm->disp_clk);
8352         b.full = dfixed_div(c, b);
8353         c.full = dfixed_const(wm->bytes_per_pixel);
8354         b.full = dfixed_mul(b, c);
8355
8356         lb_fill_bw = min(tmp, dfixed_trunc(b));
8357
8358         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8359         b.full = dfixed_const(1000);
8360         c.full = dfixed_const(lb_fill_bw);
8361         b.full = dfixed_div(c, b);
8362         a.full = dfixed_div(a, b);
8363         line_fill_time = dfixed_trunc(a);
8364
8365         if (line_fill_time < wm->active_time)
8366                 return latency;
8367         else
8368                 return latency + (line_fill_time - wm->active_time);
8369
8370 }
8371
8372 /**
8373  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8374  * average and available dram bandwidth
8375  *
8376  * @wm: watermark calculation data
8377  *
8378  * Check if the display average bandwidth fits in the display
8379  * dram bandwidth (CIK).
8380  * Used for display watermark bandwidth calculations
8381  * Returns true if the display fits, false if not.
8382  */
8383 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8384 {
8385         if (dce8_average_bandwidth(wm) <=
8386             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8387                 return true;
8388         else
8389                 return false;
8390 }
8391
8392 /**
8393  * dce8_average_bandwidth_vs_available_bandwidth - check
8394  * average and available bandwidth
8395  *
8396  * @wm: watermark calculation data
8397  *
8398  * Check if the display average bandwidth fits in the display
8399  * available bandwidth (CIK).
8400  * Used for display watermark bandwidth calculations
8401  * Returns true if the display fits, false if not.
8402  */
8403 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8404 {
8405         if (dce8_average_bandwidth(wm) <=
8406             (dce8_available_bandwidth(wm) / wm->num_heads))
8407                 return true;
8408         else
8409                 return false;
8410 }
8411
8412 /**
8413  * dce8_check_latency_hiding - check latency hiding
8414  *
8415  * @wm: watermark calculation data
8416  *
8417  * Check latency hiding (CIK).
8418  * Used for display watermark bandwidth calculations
8419  * Returns true if the display fits, false if not.
8420  */
8421 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8422 {
8423         u32 lb_partitions = wm->lb_size / wm->src_width;
8424         u32 line_time = wm->active_time + wm->blank_time;
8425         u32 latency_tolerant_lines;
8426         u32 latency_hiding;
8427         fixed20_12 a;
8428
8429         a.full = dfixed_const(1);
8430         if (wm->vsc.full > a.full)
8431                 latency_tolerant_lines = 1;
8432         else {
8433                 if (lb_partitions <= (wm->vtaps + 1))
8434                         latency_tolerant_lines = 1;
8435                 else
8436                         latency_tolerant_lines = 2;
8437         }
8438
8439         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8440
8441         if (dce8_latency_watermark(wm) <= latency_hiding)
8442                 return true;
8443         else
8444                 return false;
8445 }
8446
8447 /**
8448  * dce8_program_watermarks - program display watermarks
8449  *
8450  * @rdev: radeon_device pointer
8451  * @radeon_crtc: the selected display controller
8452  * @lb_size: line buffer size
8453  * @num_heads: number of display controllers in use
8454  *
8455  * Calculate and program the display watermarks for the
8456  * selected display controller (CIK).
8457  */
8458 static void dce8_program_watermarks(struct radeon_device *rdev,
8459                                     struct radeon_crtc *radeon_crtc,
8460                                     u32 lb_size, u32 num_heads)
8461 {
8462         struct drm_display_mode *mode = &radeon_crtc->base.mode;
8463         struct dce8_wm_params wm_low, wm_high;
8464         u32 pixel_period;
8465         u32 line_time = 0;
8466         u32 latency_watermark_a = 0, latency_watermark_b = 0;
8467         u32 tmp, wm_mask;
8468
8469         if (radeon_crtc->base.enabled && num_heads && mode) {
8470                 pixel_period = 1000000 / (u32)mode->clock;
8471                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8472
8473                 /* watermark for high clocks */
8474                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8475                     rdev->pm.dpm_enabled) {
8476                         wm_high.yclk =
8477                                 radeon_dpm_get_mclk(rdev, false) * 10;
8478                         wm_high.sclk =
8479                                 radeon_dpm_get_sclk(rdev, false) * 10;
8480                 } else {
8481                         wm_high.yclk = rdev->pm.current_mclk * 10;
8482                         wm_high.sclk = rdev->pm.current_sclk * 10;
8483                 }
8484
8485                 wm_high.disp_clk = mode->clock;
8486                 wm_high.src_width = mode->crtc_hdisplay;
8487                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8488                 wm_high.blank_time = line_time - wm_high.active_time;
8489                 wm_high.interlaced = false;
8490                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8491                         wm_high.interlaced = true;
8492                 wm_high.vsc = radeon_crtc->vsc;
8493                 wm_high.vtaps = 1;
8494                 if (radeon_crtc->rmx_type != RMX_OFF)
8495                         wm_high.vtaps = 2;
8496                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8497                 wm_high.lb_size = lb_size;
8498                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8499                 wm_high.num_heads = num_heads;
8500
8501                 /* set for high clocks */
8502                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8503
8504                 /* possibly force display priority to high */
8505                 /* should really do this at mode validation time... */
8506                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8507                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8508                     !dce8_check_latency_hiding(&wm_high) ||
8509                     (rdev->disp_priority == 2)) {
8510                         DRM_DEBUG_KMS("force priority to high\n");
8511                 }
8512
8513                 /* watermark for low clocks */
8514                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8515                     rdev->pm.dpm_enabled) {
8516                         wm_low.yclk =
8517                                 radeon_dpm_get_mclk(rdev, true) * 10;
8518                         wm_low.sclk =
8519                                 radeon_dpm_get_sclk(rdev, true) * 10;
8520                 } else {
8521                         wm_low.yclk = rdev->pm.current_mclk * 10;
8522                         wm_low.sclk = rdev->pm.current_sclk * 10;
8523                 }
8524
8525                 wm_low.disp_clk = mode->clock;
8526                 wm_low.src_width = mode->crtc_hdisplay;
8527                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8528                 wm_low.blank_time = line_time - wm_low.active_time;
8529                 wm_low.interlaced = false;
8530                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8531                         wm_low.interlaced = true;
8532                 wm_low.vsc = radeon_crtc->vsc;
8533                 wm_low.vtaps = 1;
8534                 if (radeon_crtc->rmx_type != RMX_OFF)
8535                         wm_low.vtaps = 2;
8536                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8537                 wm_low.lb_size = lb_size;
8538                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8539                 wm_low.num_heads = num_heads;
8540
8541                 /* set for low clocks */
8542                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8543
8544                 /* possibly force display priority to high */
8545                 /* should really do this at mode validation time... */
8546                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8547                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8548                     !dce8_check_latency_hiding(&wm_low) ||
8549                     (rdev->disp_priority == 2)) {
8550                         DRM_DEBUG_KMS("force priority to high\n");
8551                 }
8552         }
8553
8554         /* select wm A */
8555         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8556         tmp = wm_mask;
8557         tmp &= ~LATENCY_WATERMARK_MASK(3);
8558         tmp |= LATENCY_WATERMARK_MASK(1);
8559         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8560         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8561                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8562                 LATENCY_HIGH_WATERMARK(line_time)));
8563         /* select wm B */
8564         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8565         tmp &= ~LATENCY_WATERMARK_MASK(3);
8566         tmp |= LATENCY_WATERMARK_MASK(2);
8567         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8568         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8569                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8570                 LATENCY_HIGH_WATERMARK(line_time)));
8571         /* restore original selection */
8572         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8573
8574         /* save values for DPM */
8575         radeon_crtc->line_time = line_time;
8576         radeon_crtc->wm_high = latency_watermark_a;
8577         radeon_crtc->wm_low = latency_watermark_b;
8578 }
8579
8580 /**
8581  * dce8_bandwidth_update - program display watermarks
8582  *
8583  * @rdev: radeon_device pointer
8584  *
8585  * Calculate and program the display watermarks and line
8586  * buffer allocation (CIK).
8587  */
8588 void dce8_bandwidth_update(struct radeon_device *rdev)
8589 {
8590         struct drm_display_mode *mode = NULL;
8591         u32 num_heads = 0, lb_size;
8592         int i;
8593
8594         radeon_update_display_priority(rdev);
8595
8596         for (i = 0; i < rdev->num_crtc; i++) {
8597                 if (rdev->mode_info.crtcs[i]->base.enabled)
8598                         num_heads++;
8599         }
8600         for (i = 0; i < rdev->num_crtc; i++) {
8601                 mode = &rdev->mode_info.crtcs[i]->base.mode;
8602                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8603                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8604         }
8605 }
8606
8607 /**
8608  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8609  *
8610  * @rdev: radeon_device pointer
8611  *
8612  * Fetches a GPU clock counter snapshot (SI).
8613  * Returns the 64 bit clock counter snapshot.
8614  */
8615 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8616 {
8617         uint64_t clock;
8618
8619         mutex_lock(&rdev->gpu_clock_mutex);
8620         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8621         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8622                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8623         mutex_unlock(&rdev->gpu_clock_mutex);
8624         return clock;
8625 }
8626
8627 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8628                               u32 cntl_reg, u32 status_reg)
8629 {
8630         int r, i;
8631         struct atom_clock_dividers dividers;
8632         uint32_t tmp;
8633
8634         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8635                                            clock, false, &dividers);
8636         if (r)
8637                 return r;
8638
8639         tmp = RREG32_SMC(cntl_reg);
8640         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8641         tmp |= dividers.post_divider;
8642         WREG32_SMC(cntl_reg, tmp);
8643
8644         for (i = 0; i < 100; i++) {
8645                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8646                         break;
8647                 mdelay(10);
8648         }
8649         if (i == 100)
8650                 return -ETIMEDOUT;
8651
8652         return 0;
8653 }
8654
8655 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8656 {
8657         int r = 0;
8658
8659         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8660         if (r)
8661                 return r;
8662
8663         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8664         return r;
8665 }
8666
8667 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8668 {
8669         struct pci_dev *root = rdev->pdev->bus->self;
8670         int bridge_pos, gpu_pos;
8671         u32 speed_cntl, mask, current_data_rate;
8672         int ret, i;
8673         u16 tmp16;
8674
8675         if (radeon_pcie_gen2 == 0)
8676                 return;
8677
8678         if (rdev->flags & RADEON_IS_IGP)
8679                 return;
8680
8681         if (!(rdev->flags & RADEON_IS_PCIE))
8682                 return;
8683
8684         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8685         if (ret != 0)
8686                 return;
8687
8688         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8689                 return;
8690
8691         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8692         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8693                 LC_CURRENT_DATA_RATE_SHIFT;
8694         if (mask & DRM_PCIE_SPEED_80) {
8695                 if (current_data_rate == 2) {
8696                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8697                         return;
8698                 }
8699                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8700         } else if (mask & DRM_PCIE_SPEED_50) {
8701                 if (current_data_rate == 1) {
8702                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8703                         return;
8704                 }
8705                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8706         }
8707
8708         bridge_pos = pci_pcie_cap(root);
8709         if (!bridge_pos)
8710                 return;
8711
8712         gpu_pos = pci_pcie_cap(rdev->pdev);
8713         if (!gpu_pos)
8714                 return;
8715
8716         if (mask & DRM_PCIE_SPEED_80) {
8717                 /* re-try equalization if gen3 is not already enabled */
8718                 if (current_data_rate != 2) {
8719                         u16 bridge_cfg, gpu_cfg;
8720                         u16 bridge_cfg2, gpu_cfg2;
8721                         u32 max_lw, current_lw, tmp;
8722
8723                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8724                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8725
8726                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8727                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8728
8729                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8730                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8731
8732                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8733                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8734                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8735
8736                         if (current_lw < max_lw) {
8737                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8738                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8739                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8740                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8741                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8742                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8743                                 }
8744                         }
8745
8746                         for (i = 0; i < 10; i++) {
8747                                 /* check status */
8748                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8749                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8750                                         break;
8751
8752                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8753                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8754
8755                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8756                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8757
8758                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8759                                 tmp |= LC_SET_QUIESCE;
8760                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8761
8762                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8763                                 tmp |= LC_REDO_EQ;
8764                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8765
8766                                 mdelay(100);
8767
8768                                 /* linkctl */
8769                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8770                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8771                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8772                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8773
8774                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8775                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8776                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8777                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8778
8779                                 /* linkctl2 */
8780                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8781                                 tmp16 &= ~((1 << 4) | (7 << 9));
8782                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8783                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8784
8785                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8786                                 tmp16 &= ~((1 << 4) | (7 << 9));
8787                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8788                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8789
8790                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8791                                 tmp &= ~LC_SET_QUIESCE;
8792                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8793                         }
8794                 }
8795         }
8796
8797         /* set the link speed */
8798         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8799         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8800         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8801
8802         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8803         tmp16 &= ~0xf;
8804         if (mask & DRM_PCIE_SPEED_80)
8805                 tmp16 |= 3; /* gen3 */
8806         else if (mask & DRM_PCIE_SPEED_50)
8807                 tmp16 |= 2; /* gen2 */
8808         else
8809                 tmp16 |= 1; /* gen1 */
8810         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8811
8812         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8813         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8814         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8815
8816         for (i = 0; i < rdev->usec_timeout; i++) {
8817                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8818                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8819                         break;
8820                 udelay(1);
8821         }
8822 }
8823
8824 static void cik_program_aspm(struct radeon_device *rdev)
8825 {
8826         u32 data, orig;
8827         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8828         bool disable_clkreq = false;
8829
8830         if (radeon_aspm == 0)
8831                 return;
8832
8833         /* XXX double check IGPs */
8834         if (rdev->flags & RADEON_IS_IGP)
8835                 return;
8836
8837         if (!(rdev->flags & RADEON_IS_PCIE))
8838                 return;
8839
8840         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8841         data &= ~LC_XMIT_N_FTS_MASK;
8842         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8843         if (orig != data)
8844                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8845
8846         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8847         data |= LC_GO_TO_RECOVERY;
8848         if (orig != data)
8849                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8850
8851         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8852         data |= P_IGNORE_EDB_ERR;
8853         if (orig != data)
8854                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8855
8856         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8857         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8858         data |= LC_PMI_TO_L1_DIS;
8859         if (!disable_l0s)
8860                 data |= LC_L0S_INACTIVITY(7);
8861
8862         if (!disable_l1) {
8863                 data |= LC_L1_INACTIVITY(7);
8864                 data &= ~LC_PMI_TO_L1_DIS;
8865                 if (orig != data)
8866                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8867
8868                 if (!disable_plloff_in_l1) {
8869                         bool clk_req_support;
8870
8871                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8872                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8873                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8874                         if (orig != data)
8875                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8876
8877                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8878                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8879                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8880                         if (orig != data)
8881                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8882
8883                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8884                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8885                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8886                         if (orig != data)
8887                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8888
8889                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8890                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8891                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8892                         if (orig != data)
8893                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8894
8895                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8896                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8897                         data |= LC_DYN_LANES_PWR_STATE(3);
8898                         if (orig != data)
8899                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8900
8901                         if (!disable_clkreq) {
8902                                 struct pci_dev *root = rdev->pdev->bus->self;
8903                                 u32 lnkcap;
8904
8905                                 clk_req_support = false;
8906                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8907                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8908                                         clk_req_support = true;
8909                         } else {
8910                                 clk_req_support = false;
8911                         }
8912
8913                         if (clk_req_support) {
8914                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8915                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8916                                 if (orig != data)
8917                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8918
8919                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
8920                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8921                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8922                                 if (orig != data)
8923                                         WREG32_SMC(THM_CLK_CNTL, data);
8924
8925                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8926                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8927                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8928                                 if (orig != data)
8929                                         WREG32_SMC(MISC_CLK_CTRL, data);
8930
8931                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8932                                 data &= ~BCLK_AS_XCLK;
8933                                 if (orig != data)
8934                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
8935
8936                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8937                                 data &= ~FORCE_BIF_REFCLK_EN;
8938                                 if (orig != data)
8939                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8940
8941                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8942                                 data &= ~MPLL_CLKOUT_SEL_MASK;
8943                                 data |= MPLL_CLKOUT_SEL(4);
8944                                 if (orig != data)
8945                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8946                         }
8947                 }
8948         } else {
8949                 if (orig != data)
8950                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8951         }
8952
8953         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8954         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8955         if (orig != data)
8956                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8957
8958         if (!disable_l0s) {
8959                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8960                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8961                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8962                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8963                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8964                                 data &= ~LC_L0S_INACTIVITY_MASK;
8965                                 if (orig != data)
8966                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8967                         }
8968                 }
8969         }
8970 }