dc6d5f58018d310f469072ceda392a8ffe1cf993
[platform/adaptation/renesas_rcar/renesas_kernel.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
52 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
53 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
58 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
59 MODULE_FIRMWARE("radeon/KABINI_me.bin");
60 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
61 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
62 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
63 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
64
65 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
66 extern void r600_ih_ring_fini(struct radeon_device *rdev);
67 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
68 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
69 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
70 extern void sumo_rlc_fini(struct radeon_device *rdev);
71 extern int sumo_rlc_init(struct radeon_device *rdev);
72 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
73 extern void si_rlc_reset(struct radeon_device *rdev);
74 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
75 extern int cik_sdma_resume(struct radeon_device *rdev);
76 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
77 extern void cik_sdma_fini(struct radeon_device *rdev);
78 static void cik_rlc_stop(struct radeon_device *rdev);
79 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
80 static void cik_program_aspm(struct radeon_device *rdev);
81 static void cik_init_pg(struct radeon_device *rdev);
82 static void cik_init_cg(struct radeon_device *rdev);
83 static void cik_fini_pg(struct radeon_device *rdev);
84 static void cik_fini_cg(struct radeon_device *rdev);
85 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
86                                           bool enable);
87
88 /* get temperature in millidegrees */
89 int ci_get_temp(struct radeon_device *rdev)
90 {
91         u32 temp;
92         int actual_temp = 0;
93
94         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
95                 CTF_TEMP_SHIFT;
96
97         if (temp & 0x200)
98                 actual_temp = 255;
99         else
100                 actual_temp = temp & 0x1ff;
101
102         actual_temp = actual_temp * 1000;
103
104         return actual_temp;
105 }
106
107 /* get temperature in millidegrees */
108 int kv_get_temp(struct radeon_device *rdev)
109 {
110         u32 temp;
111         int actual_temp = 0;
112
113         temp = RREG32_SMC(0xC0300E0C);
114
115         if (temp)
116                 actual_temp = (temp / 8) - 49;
117         else
118                 actual_temp = 0;
119
120         actual_temp = actual_temp * 1000;
121
122         return actual_temp;
123 }
124
125 /*
126  * Indirect registers accessor
127  */
128 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
129 {
130         unsigned long flags;
131         u32 r;
132
133         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
134         WREG32(PCIE_INDEX, reg);
135         (void)RREG32(PCIE_INDEX);
136         r = RREG32(PCIE_DATA);
137         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
138         return r;
139 }
140
141 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
142 {
143         unsigned long flags;
144
145         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
146         WREG32(PCIE_INDEX, reg);
147         (void)RREG32(PCIE_INDEX);
148         WREG32(PCIE_DATA, v);
149         (void)RREG32(PCIE_DATA);
150         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
151 }
152
153 static const u32 spectre_rlc_save_restore_register_list[] =
154 {
155         (0x0e00 << 16) | (0xc12c >> 2),
156         0x00000000,
157         (0x0e00 << 16) | (0xc140 >> 2),
158         0x00000000,
159         (0x0e00 << 16) | (0xc150 >> 2),
160         0x00000000,
161         (0x0e00 << 16) | (0xc15c >> 2),
162         0x00000000,
163         (0x0e00 << 16) | (0xc168 >> 2),
164         0x00000000,
165         (0x0e00 << 16) | (0xc170 >> 2),
166         0x00000000,
167         (0x0e00 << 16) | (0xc178 >> 2),
168         0x00000000,
169         (0x0e00 << 16) | (0xc204 >> 2),
170         0x00000000,
171         (0x0e00 << 16) | (0xc2b4 >> 2),
172         0x00000000,
173         (0x0e00 << 16) | (0xc2b8 >> 2),
174         0x00000000,
175         (0x0e00 << 16) | (0xc2bc >> 2),
176         0x00000000,
177         (0x0e00 << 16) | (0xc2c0 >> 2),
178         0x00000000,
179         (0x0e00 << 16) | (0x8228 >> 2),
180         0x00000000,
181         (0x0e00 << 16) | (0x829c >> 2),
182         0x00000000,
183         (0x0e00 << 16) | (0x869c >> 2),
184         0x00000000,
185         (0x0600 << 16) | (0x98f4 >> 2),
186         0x00000000,
187         (0x0e00 << 16) | (0x98f8 >> 2),
188         0x00000000,
189         (0x0e00 << 16) | (0x9900 >> 2),
190         0x00000000,
191         (0x0e00 << 16) | (0xc260 >> 2),
192         0x00000000,
193         (0x0e00 << 16) | (0x90e8 >> 2),
194         0x00000000,
195         (0x0e00 << 16) | (0x3c000 >> 2),
196         0x00000000,
197         (0x0e00 << 16) | (0x3c00c >> 2),
198         0x00000000,
199         (0x0e00 << 16) | (0x8c1c >> 2),
200         0x00000000,
201         (0x0e00 << 16) | (0x9700 >> 2),
202         0x00000000,
203         (0x0e00 << 16) | (0xcd20 >> 2),
204         0x00000000,
205         (0x4e00 << 16) | (0xcd20 >> 2),
206         0x00000000,
207         (0x5e00 << 16) | (0xcd20 >> 2),
208         0x00000000,
209         (0x6e00 << 16) | (0xcd20 >> 2),
210         0x00000000,
211         (0x7e00 << 16) | (0xcd20 >> 2),
212         0x00000000,
213         (0x8e00 << 16) | (0xcd20 >> 2),
214         0x00000000,
215         (0x9e00 << 16) | (0xcd20 >> 2),
216         0x00000000,
217         (0xae00 << 16) | (0xcd20 >> 2),
218         0x00000000,
219         (0xbe00 << 16) | (0xcd20 >> 2),
220         0x00000000,
221         (0x0e00 << 16) | (0x89bc >> 2),
222         0x00000000,
223         (0x0e00 << 16) | (0x8900 >> 2),
224         0x00000000,
225         0x3,
226         (0x0e00 << 16) | (0xc130 >> 2),
227         0x00000000,
228         (0x0e00 << 16) | (0xc134 >> 2),
229         0x00000000,
230         (0x0e00 << 16) | (0xc1fc >> 2),
231         0x00000000,
232         (0x0e00 << 16) | (0xc208 >> 2),
233         0x00000000,
234         (0x0e00 << 16) | (0xc264 >> 2),
235         0x00000000,
236         (0x0e00 << 16) | (0xc268 >> 2),
237         0x00000000,
238         (0x0e00 << 16) | (0xc26c >> 2),
239         0x00000000,
240         (0x0e00 << 16) | (0xc270 >> 2),
241         0x00000000,
242         (0x0e00 << 16) | (0xc274 >> 2),
243         0x00000000,
244         (0x0e00 << 16) | (0xc278 >> 2),
245         0x00000000,
246         (0x0e00 << 16) | (0xc27c >> 2),
247         0x00000000,
248         (0x0e00 << 16) | (0xc280 >> 2),
249         0x00000000,
250         (0x0e00 << 16) | (0xc284 >> 2),
251         0x00000000,
252         (0x0e00 << 16) | (0xc288 >> 2),
253         0x00000000,
254         (0x0e00 << 16) | (0xc28c >> 2),
255         0x00000000,
256         (0x0e00 << 16) | (0xc290 >> 2),
257         0x00000000,
258         (0x0e00 << 16) | (0xc294 >> 2),
259         0x00000000,
260         (0x0e00 << 16) | (0xc298 >> 2),
261         0x00000000,
262         (0x0e00 << 16) | (0xc29c >> 2),
263         0x00000000,
264         (0x0e00 << 16) | (0xc2a0 >> 2),
265         0x00000000,
266         (0x0e00 << 16) | (0xc2a4 >> 2),
267         0x00000000,
268         (0x0e00 << 16) | (0xc2a8 >> 2),
269         0x00000000,
270         (0x0e00 << 16) | (0xc2ac  >> 2),
271         0x00000000,
272         (0x0e00 << 16) | (0xc2b0 >> 2),
273         0x00000000,
274         (0x0e00 << 16) | (0x301d0 >> 2),
275         0x00000000,
276         (0x0e00 << 16) | (0x30238 >> 2),
277         0x00000000,
278         (0x0e00 << 16) | (0x30250 >> 2),
279         0x00000000,
280         (0x0e00 << 16) | (0x30254 >> 2),
281         0x00000000,
282         (0x0e00 << 16) | (0x30258 >> 2),
283         0x00000000,
284         (0x0e00 << 16) | (0x3025c >> 2),
285         0x00000000,
286         (0x4e00 << 16) | (0xc900 >> 2),
287         0x00000000,
288         (0x5e00 << 16) | (0xc900 >> 2),
289         0x00000000,
290         (0x6e00 << 16) | (0xc900 >> 2),
291         0x00000000,
292         (0x7e00 << 16) | (0xc900 >> 2),
293         0x00000000,
294         (0x8e00 << 16) | (0xc900 >> 2),
295         0x00000000,
296         (0x9e00 << 16) | (0xc900 >> 2),
297         0x00000000,
298         (0xae00 << 16) | (0xc900 >> 2),
299         0x00000000,
300         (0xbe00 << 16) | (0xc900 >> 2),
301         0x00000000,
302         (0x4e00 << 16) | (0xc904 >> 2),
303         0x00000000,
304         (0x5e00 << 16) | (0xc904 >> 2),
305         0x00000000,
306         (0x6e00 << 16) | (0xc904 >> 2),
307         0x00000000,
308         (0x7e00 << 16) | (0xc904 >> 2),
309         0x00000000,
310         (0x8e00 << 16) | (0xc904 >> 2),
311         0x00000000,
312         (0x9e00 << 16) | (0xc904 >> 2),
313         0x00000000,
314         (0xae00 << 16) | (0xc904 >> 2),
315         0x00000000,
316         (0xbe00 << 16) | (0xc904 >> 2),
317         0x00000000,
318         (0x4e00 << 16) | (0xc908 >> 2),
319         0x00000000,
320         (0x5e00 << 16) | (0xc908 >> 2),
321         0x00000000,
322         (0x6e00 << 16) | (0xc908 >> 2),
323         0x00000000,
324         (0x7e00 << 16) | (0xc908 >> 2),
325         0x00000000,
326         (0x8e00 << 16) | (0xc908 >> 2),
327         0x00000000,
328         (0x9e00 << 16) | (0xc908 >> 2),
329         0x00000000,
330         (0xae00 << 16) | (0xc908 >> 2),
331         0x00000000,
332         (0xbe00 << 16) | (0xc908 >> 2),
333         0x00000000,
334         (0x4e00 << 16) | (0xc90c >> 2),
335         0x00000000,
336         (0x5e00 << 16) | (0xc90c >> 2),
337         0x00000000,
338         (0x6e00 << 16) | (0xc90c >> 2),
339         0x00000000,
340         (0x7e00 << 16) | (0xc90c >> 2),
341         0x00000000,
342         (0x8e00 << 16) | (0xc90c >> 2),
343         0x00000000,
344         (0x9e00 << 16) | (0xc90c >> 2),
345         0x00000000,
346         (0xae00 << 16) | (0xc90c >> 2),
347         0x00000000,
348         (0xbe00 << 16) | (0xc90c >> 2),
349         0x00000000,
350         (0x4e00 << 16) | (0xc910 >> 2),
351         0x00000000,
352         (0x5e00 << 16) | (0xc910 >> 2),
353         0x00000000,
354         (0x6e00 << 16) | (0xc910 >> 2),
355         0x00000000,
356         (0x7e00 << 16) | (0xc910 >> 2),
357         0x00000000,
358         (0x8e00 << 16) | (0xc910 >> 2),
359         0x00000000,
360         (0x9e00 << 16) | (0xc910 >> 2),
361         0x00000000,
362         (0xae00 << 16) | (0xc910 >> 2),
363         0x00000000,
364         (0xbe00 << 16) | (0xc910 >> 2),
365         0x00000000,
366         (0x0e00 << 16) | (0xc99c >> 2),
367         0x00000000,
368         (0x0e00 << 16) | (0x9834 >> 2),
369         0x00000000,
370         (0x0000 << 16) | (0x30f00 >> 2),
371         0x00000000,
372         (0x0001 << 16) | (0x30f00 >> 2),
373         0x00000000,
374         (0x0000 << 16) | (0x30f04 >> 2),
375         0x00000000,
376         (0x0001 << 16) | (0x30f04 >> 2),
377         0x00000000,
378         (0x0000 << 16) | (0x30f08 >> 2),
379         0x00000000,
380         (0x0001 << 16) | (0x30f08 >> 2),
381         0x00000000,
382         (0x0000 << 16) | (0x30f0c >> 2),
383         0x00000000,
384         (0x0001 << 16) | (0x30f0c >> 2),
385         0x00000000,
386         (0x0600 << 16) | (0x9b7c >> 2),
387         0x00000000,
388         (0x0e00 << 16) | (0x8a14 >> 2),
389         0x00000000,
390         (0x0e00 << 16) | (0x8a18 >> 2),
391         0x00000000,
392         (0x0600 << 16) | (0x30a00 >> 2),
393         0x00000000,
394         (0x0e00 << 16) | (0x8bf0 >> 2),
395         0x00000000,
396         (0x0e00 << 16) | (0x8bcc >> 2),
397         0x00000000,
398         (0x0e00 << 16) | (0x8b24 >> 2),
399         0x00000000,
400         (0x0e00 << 16) | (0x30a04 >> 2),
401         0x00000000,
402         (0x0600 << 16) | (0x30a10 >> 2),
403         0x00000000,
404         (0x0600 << 16) | (0x30a14 >> 2),
405         0x00000000,
406         (0x0600 << 16) | (0x30a18 >> 2),
407         0x00000000,
408         (0x0600 << 16) | (0x30a2c >> 2),
409         0x00000000,
410         (0x0e00 << 16) | (0xc700 >> 2),
411         0x00000000,
412         (0x0e00 << 16) | (0xc704 >> 2),
413         0x00000000,
414         (0x0e00 << 16) | (0xc708 >> 2),
415         0x00000000,
416         (0x0e00 << 16) | (0xc768 >> 2),
417         0x00000000,
418         (0x0400 << 16) | (0xc770 >> 2),
419         0x00000000,
420         (0x0400 << 16) | (0xc774 >> 2),
421         0x00000000,
422         (0x0400 << 16) | (0xc778 >> 2),
423         0x00000000,
424         (0x0400 << 16) | (0xc77c >> 2),
425         0x00000000,
426         (0x0400 << 16) | (0xc780 >> 2),
427         0x00000000,
428         (0x0400 << 16) | (0xc784 >> 2),
429         0x00000000,
430         (0x0400 << 16) | (0xc788 >> 2),
431         0x00000000,
432         (0x0400 << 16) | (0xc78c >> 2),
433         0x00000000,
434         (0x0400 << 16) | (0xc798 >> 2),
435         0x00000000,
436         (0x0400 << 16) | (0xc79c >> 2),
437         0x00000000,
438         (0x0400 << 16) | (0xc7a0 >> 2),
439         0x00000000,
440         (0x0400 << 16) | (0xc7a4 >> 2),
441         0x00000000,
442         (0x0400 << 16) | (0xc7a8 >> 2),
443         0x00000000,
444         (0x0400 << 16) | (0xc7ac >> 2),
445         0x00000000,
446         (0x0400 << 16) | (0xc7b0 >> 2),
447         0x00000000,
448         (0x0400 << 16) | (0xc7b4 >> 2),
449         0x00000000,
450         (0x0e00 << 16) | (0x9100 >> 2),
451         0x00000000,
452         (0x0e00 << 16) | (0x3c010 >> 2),
453         0x00000000,
454         (0x0e00 << 16) | (0x92a8 >> 2),
455         0x00000000,
456         (0x0e00 << 16) | (0x92ac >> 2),
457         0x00000000,
458         (0x0e00 << 16) | (0x92b4 >> 2),
459         0x00000000,
460         (0x0e00 << 16) | (0x92b8 >> 2),
461         0x00000000,
462         (0x0e00 << 16) | (0x92bc >> 2),
463         0x00000000,
464         (0x0e00 << 16) | (0x92c0 >> 2),
465         0x00000000,
466         (0x0e00 << 16) | (0x92c4 >> 2),
467         0x00000000,
468         (0x0e00 << 16) | (0x92c8 >> 2),
469         0x00000000,
470         (0x0e00 << 16) | (0x92cc >> 2),
471         0x00000000,
472         (0x0e00 << 16) | (0x92d0 >> 2),
473         0x00000000,
474         (0x0e00 << 16) | (0x8c00 >> 2),
475         0x00000000,
476         (0x0e00 << 16) | (0x8c04 >> 2),
477         0x00000000,
478         (0x0e00 << 16) | (0x8c20 >> 2),
479         0x00000000,
480         (0x0e00 << 16) | (0x8c38 >> 2),
481         0x00000000,
482         (0x0e00 << 16) | (0x8c3c >> 2),
483         0x00000000,
484         (0x0e00 << 16) | (0xae00 >> 2),
485         0x00000000,
486         (0x0e00 << 16) | (0x9604 >> 2),
487         0x00000000,
488         (0x0e00 << 16) | (0xac08 >> 2),
489         0x00000000,
490         (0x0e00 << 16) | (0xac0c >> 2),
491         0x00000000,
492         (0x0e00 << 16) | (0xac10 >> 2),
493         0x00000000,
494         (0x0e00 << 16) | (0xac14 >> 2),
495         0x00000000,
496         (0x0e00 << 16) | (0xac58 >> 2),
497         0x00000000,
498         (0x0e00 << 16) | (0xac68 >> 2),
499         0x00000000,
500         (0x0e00 << 16) | (0xac6c >> 2),
501         0x00000000,
502         (0x0e00 << 16) | (0xac70 >> 2),
503         0x00000000,
504         (0x0e00 << 16) | (0xac74 >> 2),
505         0x00000000,
506         (0x0e00 << 16) | (0xac78 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0xac7c >> 2),
509         0x00000000,
510         (0x0e00 << 16) | (0xac80 >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0xac84 >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0xac88 >> 2),
515         0x00000000,
516         (0x0e00 << 16) | (0xac8c >> 2),
517         0x00000000,
518         (0x0e00 << 16) | (0x970c >> 2),
519         0x00000000,
520         (0x0e00 << 16) | (0x9714 >> 2),
521         0x00000000,
522         (0x0e00 << 16) | (0x9718 >> 2),
523         0x00000000,
524         (0x0e00 << 16) | (0x971c >> 2),
525         0x00000000,
526         (0x0e00 << 16) | (0x31068 >> 2),
527         0x00000000,
528         (0x4e00 << 16) | (0x31068 >> 2),
529         0x00000000,
530         (0x5e00 << 16) | (0x31068 >> 2),
531         0x00000000,
532         (0x6e00 << 16) | (0x31068 >> 2),
533         0x00000000,
534         (0x7e00 << 16) | (0x31068 >> 2),
535         0x00000000,
536         (0x8e00 << 16) | (0x31068 >> 2),
537         0x00000000,
538         (0x9e00 << 16) | (0x31068 >> 2),
539         0x00000000,
540         (0xae00 << 16) | (0x31068 >> 2),
541         0x00000000,
542         (0xbe00 << 16) | (0x31068 >> 2),
543         0x00000000,
544         (0x0e00 << 16) | (0xcd10 >> 2),
545         0x00000000,
546         (0x0e00 << 16) | (0xcd14 >> 2),
547         0x00000000,
548         (0x0e00 << 16) | (0x88b0 >> 2),
549         0x00000000,
550         (0x0e00 << 16) | (0x88b4 >> 2),
551         0x00000000,
552         (0x0e00 << 16) | (0x88b8 >> 2),
553         0x00000000,
554         (0x0e00 << 16) | (0x88bc >> 2),
555         0x00000000,
556         (0x0400 << 16) | (0x89c0 >> 2),
557         0x00000000,
558         (0x0e00 << 16) | (0x88c4 >> 2),
559         0x00000000,
560         (0x0e00 << 16) | (0x88c8 >> 2),
561         0x00000000,
562         (0x0e00 << 16) | (0x88d0 >> 2),
563         0x00000000,
564         (0x0e00 << 16) | (0x88d4 >> 2),
565         0x00000000,
566         (0x0e00 << 16) | (0x88d8 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x8980 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x30938 >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x3093c >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x30940 >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x89a0 >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x30900 >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x30904 >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x89b4 >> 2),
583         0x00000000,
584         (0x0e00 << 16) | (0x3c210 >> 2),
585         0x00000000,
586         (0x0e00 << 16) | (0x3c214 >> 2),
587         0x00000000,
588         (0x0e00 << 16) | (0x3c218 >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0x8904 >> 2),
591         0x00000000,
592         0x5,
593         (0x0e00 << 16) | (0x8c28 >> 2),
594         (0x0e00 << 16) | (0x8c2c >> 2),
595         (0x0e00 << 16) | (0x8c30 >> 2),
596         (0x0e00 << 16) | (0x8c34 >> 2),
597         (0x0e00 << 16) | (0x9600 >> 2),
598 };
599
600 static const u32 kalindi_rlc_save_restore_register_list[] =
601 {
602         (0x0e00 << 16) | (0xc12c >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0xc140 >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xc150 >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xc15c >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0xc168 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0xc170 >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0xc204 >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0xc2b4 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0xc2b8 >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0xc2bc >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0xc2c0 >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0x8228 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0x829c >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0x869c >> 2),
629         0x00000000,
630         (0x0600 << 16) | (0x98f4 >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0x98f8 >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0x9900 >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0xc260 >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0x90e8 >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0x3c000 >> 2),
641         0x00000000,
642         (0x0e00 << 16) | (0x3c00c >> 2),
643         0x00000000,
644         (0x0e00 << 16) | (0x8c1c >> 2),
645         0x00000000,
646         (0x0e00 << 16) | (0x9700 >> 2),
647         0x00000000,
648         (0x0e00 << 16) | (0xcd20 >> 2),
649         0x00000000,
650         (0x4e00 << 16) | (0xcd20 >> 2),
651         0x00000000,
652         (0x5e00 << 16) | (0xcd20 >> 2),
653         0x00000000,
654         (0x6e00 << 16) | (0xcd20 >> 2),
655         0x00000000,
656         (0x7e00 << 16) | (0xcd20 >> 2),
657         0x00000000,
658         (0x0e00 << 16) | (0x89bc >> 2),
659         0x00000000,
660         (0x0e00 << 16) | (0x8900 >> 2),
661         0x00000000,
662         0x3,
663         (0x0e00 << 16) | (0xc130 >> 2),
664         0x00000000,
665         (0x0e00 << 16) | (0xc134 >> 2),
666         0x00000000,
667         (0x0e00 << 16) | (0xc1fc >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0xc208 >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0xc264 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0xc268 >> 2),
674         0x00000000,
675         (0x0e00 << 16) | (0xc26c >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0xc270 >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0xc274 >> 2),
680         0x00000000,
681         (0x0e00 << 16) | (0xc28c >> 2),
682         0x00000000,
683         (0x0e00 << 16) | (0xc290 >> 2),
684         0x00000000,
685         (0x0e00 << 16) | (0xc294 >> 2),
686         0x00000000,
687         (0x0e00 << 16) | (0xc298 >> 2),
688         0x00000000,
689         (0x0e00 << 16) | (0xc2a0 >> 2),
690         0x00000000,
691         (0x0e00 << 16) | (0xc2a4 >> 2),
692         0x00000000,
693         (0x0e00 << 16) | (0xc2a8 >> 2),
694         0x00000000,
695         (0x0e00 << 16) | (0xc2ac >> 2),
696         0x00000000,
697         (0x0e00 << 16) | (0x301d0 >> 2),
698         0x00000000,
699         (0x0e00 << 16) | (0x30238 >> 2),
700         0x00000000,
701         (0x0e00 << 16) | (0x30250 >> 2),
702         0x00000000,
703         (0x0e00 << 16) | (0x30254 >> 2),
704         0x00000000,
705         (0x0e00 << 16) | (0x30258 >> 2),
706         0x00000000,
707         (0x0e00 << 16) | (0x3025c >> 2),
708         0x00000000,
709         (0x4e00 << 16) | (0xc900 >> 2),
710         0x00000000,
711         (0x5e00 << 16) | (0xc900 >> 2),
712         0x00000000,
713         (0x6e00 << 16) | (0xc900 >> 2),
714         0x00000000,
715         (0x7e00 << 16) | (0xc900 >> 2),
716         0x00000000,
717         (0x4e00 << 16) | (0xc904 >> 2),
718         0x00000000,
719         (0x5e00 << 16) | (0xc904 >> 2),
720         0x00000000,
721         (0x6e00 << 16) | (0xc904 >> 2),
722         0x00000000,
723         (0x7e00 << 16) | (0xc904 >> 2),
724         0x00000000,
725         (0x4e00 << 16) | (0xc908 >> 2),
726         0x00000000,
727         (0x5e00 << 16) | (0xc908 >> 2),
728         0x00000000,
729         (0x6e00 << 16) | (0xc908 >> 2),
730         0x00000000,
731         (0x7e00 << 16) | (0xc908 >> 2),
732         0x00000000,
733         (0x4e00 << 16) | (0xc90c >> 2),
734         0x00000000,
735         (0x5e00 << 16) | (0xc90c >> 2),
736         0x00000000,
737         (0x6e00 << 16) | (0xc90c >> 2),
738         0x00000000,
739         (0x7e00 << 16) | (0xc90c >> 2),
740         0x00000000,
741         (0x4e00 << 16) | (0xc910 >> 2),
742         0x00000000,
743         (0x5e00 << 16) | (0xc910 >> 2),
744         0x00000000,
745         (0x6e00 << 16) | (0xc910 >> 2),
746         0x00000000,
747         (0x7e00 << 16) | (0xc910 >> 2),
748         0x00000000,
749         (0x0e00 << 16) | (0xc99c >> 2),
750         0x00000000,
751         (0x0e00 << 16) | (0x9834 >> 2),
752         0x00000000,
753         (0x0000 << 16) | (0x30f00 >> 2),
754         0x00000000,
755         (0x0000 << 16) | (0x30f04 >> 2),
756         0x00000000,
757         (0x0000 << 16) | (0x30f08 >> 2),
758         0x00000000,
759         (0x0000 << 16) | (0x30f0c >> 2),
760         0x00000000,
761         (0x0600 << 16) | (0x9b7c >> 2),
762         0x00000000,
763         (0x0e00 << 16) | (0x8a14 >> 2),
764         0x00000000,
765         (0x0e00 << 16) | (0x8a18 >> 2),
766         0x00000000,
767         (0x0600 << 16) | (0x30a00 >> 2),
768         0x00000000,
769         (0x0e00 << 16) | (0x8bf0 >> 2),
770         0x00000000,
771         (0x0e00 << 16) | (0x8bcc >> 2),
772         0x00000000,
773         (0x0e00 << 16) | (0x8b24 >> 2),
774         0x00000000,
775         (0x0e00 << 16) | (0x30a04 >> 2),
776         0x00000000,
777         (0x0600 << 16) | (0x30a10 >> 2),
778         0x00000000,
779         (0x0600 << 16) | (0x30a14 >> 2),
780         0x00000000,
781         (0x0600 << 16) | (0x30a18 >> 2),
782         0x00000000,
783         (0x0600 << 16) | (0x30a2c >> 2),
784         0x00000000,
785         (0x0e00 << 16) | (0xc700 >> 2),
786         0x00000000,
787         (0x0e00 << 16) | (0xc704 >> 2),
788         0x00000000,
789         (0x0e00 << 16) | (0xc708 >> 2),
790         0x00000000,
791         (0x0e00 << 16) | (0xc768 >> 2),
792         0x00000000,
793         (0x0400 << 16) | (0xc770 >> 2),
794         0x00000000,
795         (0x0400 << 16) | (0xc774 >> 2),
796         0x00000000,
797         (0x0400 << 16) | (0xc798 >> 2),
798         0x00000000,
799         (0x0400 << 16) | (0xc79c >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0x9100 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0x3c010 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0x8c00 >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0x8c04 >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0x8c20 >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0x8c38 >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0x8c3c >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0xae00 >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0x9604 >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0xac08 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0xac0c >> 2),
822         0x00000000,
823         (0x0e00 << 16) | (0xac10 >> 2),
824         0x00000000,
825         (0x0e00 << 16) | (0xac14 >> 2),
826         0x00000000,
827         (0x0e00 << 16) | (0xac58 >> 2),
828         0x00000000,
829         (0x0e00 << 16) | (0xac68 >> 2),
830         0x00000000,
831         (0x0e00 << 16) | (0xac6c >> 2),
832         0x00000000,
833         (0x0e00 << 16) | (0xac70 >> 2),
834         0x00000000,
835         (0x0e00 << 16) | (0xac74 >> 2),
836         0x00000000,
837         (0x0e00 << 16) | (0xac78 >> 2),
838         0x00000000,
839         (0x0e00 << 16) | (0xac7c >> 2),
840         0x00000000,
841         (0x0e00 << 16) | (0xac80 >> 2),
842         0x00000000,
843         (0x0e00 << 16) | (0xac84 >> 2),
844         0x00000000,
845         (0x0e00 << 16) | (0xac88 >> 2),
846         0x00000000,
847         (0x0e00 << 16) | (0xac8c >> 2),
848         0x00000000,
849         (0x0e00 << 16) | (0x970c >> 2),
850         0x00000000,
851         (0x0e00 << 16) | (0x9714 >> 2),
852         0x00000000,
853         (0x0e00 << 16) | (0x9718 >> 2),
854         0x00000000,
855         (0x0e00 << 16) | (0x971c >> 2),
856         0x00000000,
857         (0x0e00 << 16) | (0x31068 >> 2),
858         0x00000000,
859         (0x4e00 << 16) | (0x31068 >> 2),
860         0x00000000,
861         (0x5e00 << 16) | (0x31068 >> 2),
862         0x00000000,
863         (0x6e00 << 16) | (0x31068 >> 2),
864         0x00000000,
865         (0x7e00 << 16) | (0x31068 >> 2),
866         0x00000000,
867         (0x0e00 << 16) | (0xcd10 >> 2),
868         0x00000000,
869         (0x0e00 << 16) | (0xcd14 >> 2),
870         0x00000000,
871         (0x0e00 << 16) | (0x88b0 >> 2),
872         0x00000000,
873         (0x0e00 << 16) | (0x88b4 >> 2),
874         0x00000000,
875         (0x0e00 << 16) | (0x88b8 >> 2),
876         0x00000000,
877         (0x0e00 << 16) | (0x88bc >> 2),
878         0x00000000,
879         (0x0400 << 16) | (0x89c0 >> 2),
880         0x00000000,
881         (0x0e00 << 16) | (0x88c4 >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0x88c8 >> 2),
884         0x00000000,
885         (0x0e00 << 16) | (0x88d0 >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x88d4 >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x88d8 >> 2),
890         0x00000000,
891         (0x0e00 << 16) | (0x8980 >> 2),
892         0x00000000,
893         (0x0e00 << 16) | (0x30938 >> 2),
894         0x00000000,
895         (0x0e00 << 16) | (0x3093c >> 2),
896         0x00000000,
897         (0x0e00 << 16) | (0x30940 >> 2),
898         0x00000000,
899         (0x0e00 << 16) | (0x89a0 >> 2),
900         0x00000000,
901         (0x0e00 << 16) | (0x30900 >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0x30904 >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0x89b4 >> 2),
906         0x00000000,
907         (0x0e00 << 16) | (0x3e1fc >> 2),
908         0x00000000,
909         (0x0e00 << 16) | (0x3c210 >> 2),
910         0x00000000,
911         (0x0e00 << 16) | (0x3c214 >> 2),
912         0x00000000,
913         (0x0e00 << 16) | (0x3c218 >> 2),
914         0x00000000,
915         (0x0e00 << 16) | (0x8904 >> 2),
916         0x00000000,
917         0x5,
918         (0x0e00 << 16) | (0x8c28 >> 2),
919         (0x0e00 << 16) | (0x8c2c >> 2),
920         (0x0e00 << 16) | (0x8c30 >> 2),
921         (0x0e00 << 16) | (0x8c34 >> 2),
922         (0x0e00 << 16) | (0x9600 >> 2),
923 };
924
925 static const u32 bonaire_golden_spm_registers[] =
926 {
927         0x30800, 0xe0ffffff, 0xe0000000
928 };
929
930 static const u32 bonaire_golden_common_registers[] =
931 {
932         0xc770, 0xffffffff, 0x00000800,
933         0xc774, 0xffffffff, 0x00000800,
934         0xc798, 0xffffffff, 0x00007fbf,
935         0xc79c, 0xffffffff, 0x00007faf
936 };
937
938 static const u32 bonaire_golden_registers[] =
939 {
940         0x3354, 0x00000333, 0x00000333,
941         0x3350, 0x000c0fc0, 0x00040200,
942         0x9a10, 0x00010000, 0x00058208,
943         0x3c000, 0xffff1fff, 0x00140000,
944         0x3c200, 0xfdfc0fff, 0x00000100,
945         0x3c234, 0x40000000, 0x40000200,
946         0x9830, 0xffffffff, 0x00000000,
947         0x9834, 0xf00fffff, 0x00000400,
948         0x9838, 0x0002021c, 0x00020200,
949         0xc78, 0x00000080, 0x00000000,
950         0x5bb0, 0x000000f0, 0x00000070,
951         0x5bc0, 0xf0311fff, 0x80300000,
952         0x98f8, 0x73773777, 0x12010001,
953         0x350c, 0x00810000, 0x408af000,
954         0x7030, 0x31000111, 0x00000011,
955         0x2f48, 0x73773777, 0x12010001,
956         0x220c, 0x00007fb6, 0x0021a1b1,
957         0x2210, 0x00007fb6, 0x002021b1,
958         0x2180, 0x00007fb6, 0x00002191,
959         0x2218, 0x00007fb6, 0x002121b1,
960         0x221c, 0x00007fb6, 0x002021b1,
961         0x21dc, 0x00007fb6, 0x00002191,
962         0x21e0, 0x00007fb6, 0x00002191,
963         0x3628, 0x0000003f, 0x0000000a,
964         0x362c, 0x0000003f, 0x0000000a,
965         0x2ae4, 0x00073ffe, 0x000022a2,
966         0x240c, 0x000007ff, 0x00000000,
967         0x8a14, 0xf000003f, 0x00000007,
968         0x8bf0, 0x00002001, 0x00000001,
969         0x8b24, 0xffffffff, 0x00ffffff,
970         0x30a04, 0x0000ff0f, 0x00000000,
971         0x28a4c, 0x07ffffff, 0x06000000,
972         0x4d8, 0x00000fff, 0x00000100,
973         0x3e78, 0x00000001, 0x00000002,
974         0x9100, 0x03000000, 0x0362c688,
975         0x8c00, 0x000000ff, 0x00000001,
976         0xe40, 0x00001fff, 0x00001fff,
977         0x9060, 0x0000007f, 0x00000020,
978         0x9508, 0x00010000, 0x00010000,
979         0xac14, 0x000003ff, 0x000000f3,
980         0xac0c, 0xffffffff, 0x00001032
981 };
982
983 static const u32 bonaire_mgcg_cgcg_init[] =
984 {
985         0xc420, 0xffffffff, 0xfffffffc,
986         0x30800, 0xffffffff, 0xe0000000,
987         0x3c2a0, 0xffffffff, 0x00000100,
988         0x3c208, 0xffffffff, 0x00000100,
989         0x3c2c0, 0xffffffff, 0xc0000100,
990         0x3c2c8, 0xffffffff, 0xc0000100,
991         0x3c2c4, 0xffffffff, 0xc0000100,
992         0x55e4, 0xffffffff, 0x00600100,
993         0x3c280, 0xffffffff, 0x00000100,
994         0x3c214, 0xffffffff, 0x06000100,
995         0x3c220, 0xffffffff, 0x00000100,
996         0x3c218, 0xffffffff, 0x06000100,
997         0x3c204, 0xffffffff, 0x00000100,
998         0x3c2e0, 0xffffffff, 0x00000100,
999         0x3c224, 0xffffffff, 0x00000100,
1000         0x3c200, 0xffffffff, 0x00000100,
1001         0x3c230, 0xffffffff, 0x00000100,
1002         0x3c234, 0xffffffff, 0x00000100,
1003         0x3c250, 0xffffffff, 0x00000100,
1004         0x3c254, 0xffffffff, 0x00000100,
1005         0x3c258, 0xffffffff, 0x00000100,
1006         0x3c25c, 0xffffffff, 0x00000100,
1007         0x3c260, 0xffffffff, 0x00000100,
1008         0x3c27c, 0xffffffff, 0x00000100,
1009         0x3c278, 0xffffffff, 0x00000100,
1010         0x3c210, 0xffffffff, 0x06000100,
1011         0x3c290, 0xffffffff, 0x00000100,
1012         0x3c274, 0xffffffff, 0x00000100,
1013         0x3c2b4, 0xffffffff, 0x00000100,
1014         0x3c2b0, 0xffffffff, 0x00000100,
1015         0x3c270, 0xffffffff, 0x00000100,
1016         0x30800, 0xffffffff, 0xe0000000,
1017         0x3c020, 0xffffffff, 0x00010000,
1018         0x3c024, 0xffffffff, 0x00030002,
1019         0x3c028, 0xffffffff, 0x00040007,
1020         0x3c02c, 0xffffffff, 0x00060005,
1021         0x3c030, 0xffffffff, 0x00090008,
1022         0x3c034, 0xffffffff, 0x00010000,
1023         0x3c038, 0xffffffff, 0x00030002,
1024         0x3c03c, 0xffffffff, 0x00040007,
1025         0x3c040, 0xffffffff, 0x00060005,
1026         0x3c044, 0xffffffff, 0x00090008,
1027         0x3c048, 0xffffffff, 0x00010000,
1028         0x3c04c, 0xffffffff, 0x00030002,
1029         0x3c050, 0xffffffff, 0x00040007,
1030         0x3c054, 0xffffffff, 0x00060005,
1031         0x3c058, 0xffffffff, 0x00090008,
1032         0x3c05c, 0xffffffff, 0x00010000,
1033         0x3c060, 0xffffffff, 0x00030002,
1034         0x3c064, 0xffffffff, 0x00040007,
1035         0x3c068, 0xffffffff, 0x00060005,
1036         0x3c06c, 0xffffffff, 0x00090008,
1037         0x3c070, 0xffffffff, 0x00010000,
1038         0x3c074, 0xffffffff, 0x00030002,
1039         0x3c078, 0xffffffff, 0x00040007,
1040         0x3c07c, 0xffffffff, 0x00060005,
1041         0x3c080, 0xffffffff, 0x00090008,
1042         0x3c084, 0xffffffff, 0x00010000,
1043         0x3c088, 0xffffffff, 0x00030002,
1044         0x3c08c, 0xffffffff, 0x00040007,
1045         0x3c090, 0xffffffff, 0x00060005,
1046         0x3c094, 0xffffffff, 0x00090008,
1047         0x3c098, 0xffffffff, 0x00010000,
1048         0x3c09c, 0xffffffff, 0x00030002,
1049         0x3c0a0, 0xffffffff, 0x00040007,
1050         0x3c0a4, 0xffffffff, 0x00060005,
1051         0x3c0a8, 0xffffffff, 0x00090008,
1052         0x3c000, 0xffffffff, 0x96e00200,
1053         0x8708, 0xffffffff, 0x00900100,
1054         0xc424, 0xffffffff, 0x0020003f,
1055         0x38, 0xffffffff, 0x0140001c,
1056         0x3c, 0x000f0000, 0x000f0000,
1057         0x220, 0xffffffff, 0xC060000C,
1058         0x224, 0xc0000fff, 0x00000100,
1059         0xf90, 0xffffffff, 0x00000100,
1060         0xf98, 0x00000101, 0x00000000,
1061         0x20a8, 0xffffffff, 0x00000104,
1062         0x55e4, 0xff000fff, 0x00000100,
1063         0x30cc, 0xc0000fff, 0x00000104,
1064         0xc1e4, 0x00000001, 0x00000001,
1065         0xd00c, 0xff000ff0, 0x00000100,
1066         0xd80c, 0xff000ff0, 0x00000100
1067 };
1068
1069 static const u32 spectre_golden_spm_registers[] =
1070 {
1071         0x30800, 0xe0ffffff, 0xe0000000
1072 };
1073
1074 static const u32 spectre_golden_common_registers[] =
1075 {
1076         0xc770, 0xffffffff, 0x00000800,
1077         0xc774, 0xffffffff, 0x00000800,
1078         0xc798, 0xffffffff, 0x00007fbf,
1079         0xc79c, 0xffffffff, 0x00007faf
1080 };
1081
1082 static const u32 spectre_golden_registers[] =
1083 {
1084         0x3c000, 0xffff1fff, 0x96940200,
1085         0x3c00c, 0xffff0001, 0xff000000,
1086         0x3c200, 0xfffc0fff, 0x00000100,
1087         0x6ed8, 0x00010101, 0x00010000,
1088         0x9834, 0xf00fffff, 0x00000400,
1089         0x9838, 0xfffffffc, 0x00020200,
1090         0x5bb0, 0x000000f0, 0x00000070,
1091         0x5bc0, 0xf0311fff, 0x80300000,
1092         0x98f8, 0x73773777, 0x12010001,
1093         0x9b7c, 0x00ff0000, 0x00fc0000,
1094         0x2f48, 0x73773777, 0x12010001,
1095         0x8a14, 0xf000003f, 0x00000007,
1096         0x8b24, 0xffffffff, 0x00ffffff,
1097         0x28350, 0x3f3f3fff, 0x00000082,
1098         0x28355, 0x0000003f, 0x00000000,
1099         0x3e78, 0x00000001, 0x00000002,
1100         0x913c, 0xffff03df, 0x00000004,
1101         0xc768, 0x00000008, 0x00000008,
1102         0x8c00, 0x000008ff, 0x00000800,
1103         0x9508, 0x00010000, 0x00010000,
1104         0xac0c, 0xffffffff, 0x54763210,
1105         0x214f8, 0x01ff01ff, 0x00000002,
1106         0x21498, 0x007ff800, 0x00200000,
1107         0x2015c, 0xffffffff, 0x00000f40,
1108         0x30934, 0xffffffff, 0x00000001
1109 };
1110
1111 static const u32 spectre_mgcg_cgcg_init[] =
1112 {
1113         0xc420, 0xffffffff, 0xfffffffc,
1114         0x30800, 0xffffffff, 0xe0000000,
1115         0x3c2a0, 0xffffffff, 0x00000100,
1116         0x3c208, 0xffffffff, 0x00000100,
1117         0x3c2c0, 0xffffffff, 0x00000100,
1118         0x3c2c8, 0xffffffff, 0x00000100,
1119         0x3c2c4, 0xffffffff, 0x00000100,
1120         0x55e4, 0xffffffff, 0x00600100,
1121         0x3c280, 0xffffffff, 0x00000100,
1122         0x3c214, 0xffffffff, 0x06000100,
1123         0x3c220, 0xffffffff, 0x00000100,
1124         0x3c218, 0xffffffff, 0x06000100,
1125         0x3c204, 0xffffffff, 0x00000100,
1126         0x3c2e0, 0xffffffff, 0x00000100,
1127         0x3c224, 0xffffffff, 0x00000100,
1128         0x3c200, 0xffffffff, 0x00000100,
1129         0x3c230, 0xffffffff, 0x00000100,
1130         0x3c234, 0xffffffff, 0x00000100,
1131         0x3c250, 0xffffffff, 0x00000100,
1132         0x3c254, 0xffffffff, 0x00000100,
1133         0x3c258, 0xffffffff, 0x00000100,
1134         0x3c25c, 0xffffffff, 0x00000100,
1135         0x3c260, 0xffffffff, 0x00000100,
1136         0x3c27c, 0xffffffff, 0x00000100,
1137         0x3c278, 0xffffffff, 0x00000100,
1138         0x3c210, 0xffffffff, 0x06000100,
1139         0x3c290, 0xffffffff, 0x00000100,
1140         0x3c274, 0xffffffff, 0x00000100,
1141         0x3c2b4, 0xffffffff, 0x00000100,
1142         0x3c2b0, 0xffffffff, 0x00000100,
1143         0x3c270, 0xffffffff, 0x00000100,
1144         0x30800, 0xffffffff, 0xe0000000,
1145         0x3c020, 0xffffffff, 0x00010000,
1146         0x3c024, 0xffffffff, 0x00030002,
1147         0x3c028, 0xffffffff, 0x00040007,
1148         0x3c02c, 0xffffffff, 0x00060005,
1149         0x3c030, 0xffffffff, 0x00090008,
1150         0x3c034, 0xffffffff, 0x00010000,
1151         0x3c038, 0xffffffff, 0x00030002,
1152         0x3c03c, 0xffffffff, 0x00040007,
1153         0x3c040, 0xffffffff, 0x00060005,
1154         0x3c044, 0xffffffff, 0x00090008,
1155         0x3c048, 0xffffffff, 0x00010000,
1156         0x3c04c, 0xffffffff, 0x00030002,
1157         0x3c050, 0xffffffff, 0x00040007,
1158         0x3c054, 0xffffffff, 0x00060005,
1159         0x3c058, 0xffffffff, 0x00090008,
1160         0x3c05c, 0xffffffff, 0x00010000,
1161         0x3c060, 0xffffffff, 0x00030002,
1162         0x3c064, 0xffffffff, 0x00040007,
1163         0x3c068, 0xffffffff, 0x00060005,
1164         0x3c06c, 0xffffffff, 0x00090008,
1165         0x3c070, 0xffffffff, 0x00010000,
1166         0x3c074, 0xffffffff, 0x00030002,
1167         0x3c078, 0xffffffff, 0x00040007,
1168         0x3c07c, 0xffffffff, 0x00060005,
1169         0x3c080, 0xffffffff, 0x00090008,
1170         0x3c084, 0xffffffff, 0x00010000,
1171         0x3c088, 0xffffffff, 0x00030002,
1172         0x3c08c, 0xffffffff, 0x00040007,
1173         0x3c090, 0xffffffff, 0x00060005,
1174         0x3c094, 0xffffffff, 0x00090008,
1175         0x3c098, 0xffffffff, 0x00010000,
1176         0x3c09c, 0xffffffff, 0x00030002,
1177         0x3c0a0, 0xffffffff, 0x00040007,
1178         0x3c0a4, 0xffffffff, 0x00060005,
1179         0x3c0a8, 0xffffffff, 0x00090008,
1180         0x3c0ac, 0xffffffff, 0x00010000,
1181         0x3c0b0, 0xffffffff, 0x00030002,
1182         0x3c0b4, 0xffffffff, 0x00040007,
1183         0x3c0b8, 0xffffffff, 0x00060005,
1184         0x3c0bc, 0xffffffff, 0x00090008,
1185         0x3c000, 0xffffffff, 0x96e00200,
1186         0x8708, 0xffffffff, 0x00900100,
1187         0xc424, 0xffffffff, 0x0020003f,
1188         0x38, 0xffffffff, 0x0140001c,
1189         0x3c, 0x000f0000, 0x000f0000,
1190         0x220, 0xffffffff, 0xC060000C,
1191         0x224, 0xc0000fff, 0x00000100,
1192         0xf90, 0xffffffff, 0x00000100,
1193         0xf98, 0x00000101, 0x00000000,
1194         0x20a8, 0xffffffff, 0x00000104,
1195         0x55e4, 0xff000fff, 0x00000100,
1196         0x30cc, 0xc0000fff, 0x00000104,
1197         0xc1e4, 0x00000001, 0x00000001,
1198         0xd00c, 0xff000ff0, 0x00000100,
1199         0xd80c, 0xff000ff0, 0x00000100
1200 };
1201
1202 static const u32 kalindi_golden_spm_registers[] =
1203 {
1204         0x30800, 0xe0ffffff, 0xe0000000
1205 };
1206
1207 static const u32 kalindi_golden_common_registers[] =
1208 {
1209         0xc770, 0xffffffff, 0x00000800,
1210         0xc774, 0xffffffff, 0x00000800,
1211         0xc798, 0xffffffff, 0x00007fbf,
1212         0xc79c, 0xffffffff, 0x00007faf
1213 };
1214
1215 static const u32 kalindi_golden_registers[] =
1216 {
1217         0x3c000, 0xffffdfff, 0x6e944040,
1218         0x55e4, 0xff607fff, 0xfc000100,
1219         0x3c220, 0xff000fff, 0x00000100,
1220         0x3c224, 0xff000fff, 0x00000100,
1221         0x3c200, 0xfffc0fff, 0x00000100,
1222         0x6ed8, 0x00010101, 0x00010000,
1223         0x9830, 0xffffffff, 0x00000000,
1224         0x9834, 0xf00fffff, 0x00000400,
1225         0x5bb0, 0x000000f0, 0x00000070,
1226         0x5bc0, 0xf0311fff, 0x80300000,
1227         0x98f8, 0x73773777, 0x12010001,
1228         0x98fc, 0xffffffff, 0x00000010,
1229         0x9b7c, 0x00ff0000, 0x00fc0000,
1230         0x8030, 0x00001f0f, 0x0000100a,
1231         0x2f48, 0x73773777, 0x12010001,
1232         0x2408, 0x000fffff, 0x000c007f,
1233         0x8a14, 0xf000003f, 0x00000007,
1234         0x8b24, 0x3fff3fff, 0x00ffcfff,
1235         0x30a04, 0x0000ff0f, 0x00000000,
1236         0x28a4c, 0x07ffffff, 0x06000000,
1237         0x4d8, 0x00000fff, 0x00000100,
1238         0x3e78, 0x00000001, 0x00000002,
1239         0xc768, 0x00000008, 0x00000008,
1240         0x8c00, 0x000000ff, 0x00000003,
1241         0x214f8, 0x01ff01ff, 0x00000002,
1242         0x21498, 0x007ff800, 0x00200000,
1243         0x2015c, 0xffffffff, 0x00000f40,
1244         0x88c4, 0x001f3ae3, 0x00000082,
1245         0x88d4, 0x0000001f, 0x00000010,
1246         0x30934, 0xffffffff, 0x00000000
1247 };
1248
1249 static const u32 kalindi_mgcg_cgcg_init[] =
1250 {
1251         0xc420, 0xffffffff, 0xfffffffc,
1252         0x30800, 0xffffffff, 0xe0000000,
1253         0x3c2a0, 0xffffffff, 0x00000100,
1254         0x3c208, 0xffffffff, 0x00000100,
1255         0x3c2c0, 0xffffffff, 0x00000100,
1256         0x3c2c8, 0xffffffff, 0x00000100,
1257         0x3c2c4, 0xffffffff, 0x00000100,
1258         0x55e4, 0xffffffff, 0x00600100,
1259         0x3c280, 0xffffffff, 0x00000100,
1260         0x3c214, 0xffffffff, 0x06000100,
1261         0x3c220, 0xffffffff, 0x00000100,
1262         0x3c218, 0xffffffff, 0x06000100,
1263         0x3c204, 0xffffffff, 0x00000100,
1264         0x3c2e0, 0xffffffff, 0x00000100,
1265         0x3c224, 0xffffffff, 0x00000100,
1266         0x3c200, 0xffffffff, 0x00000100,
1267         0x3c230, 0xffffffff, 0x00000100,
1268         0x3c234, 0xffffffff, 0x00000100,
1269         0x3c250, 0xffffffff, 0x00000100,
1270         0x3c254, 0xffffffff, 0x00000100,
1271         0x3c258, 0xffffffff, 0x00000100,
1272         0x3c25c, 0xffffffff, 0x00000100,
1273         0x3c260, 0xffffffff, 0x00000100,
1274         0x3c27c, 0xffffffff, 0x00000100,
1275         0x3c278, 0xffffffff, 0x00000100,
1276         0x3c210, 0xffffffff, 0x06000100,
1277         0x3c290, 0xffffffff, 0x00000100,
1278         0x3c274, 0xffffffff, 0x00000100,
1279         0x3c2b4, 0xffffffff, 0x00000100,
1280         0x3c2b0, 0xffffffff, 0x00000100,
1281         0x3c270, 0xffffffff, 0x00000100,
1282         0x30800, 0xffffffff, 0xe0000000,
1283         0x3c020, 0xffffffff, 0x00010000,
1284         0x3c024, 0xffffffff, 0x00030002,
1285         0x3c028, 0xffffffff, 0x00040007,
1286         0x3c02c, 0xffffffff, 0x00060005,
1287         0x3c030, 0xffffffff, 0x00090008,
1288         0x3c034, 0xffffffff, 0x00010000,
1289         0x3c038, 0xffffffff, 0x00030002,
1290         0x3c03c, 0xffffffff, 0x00040007,
1291         0x3c040, 0xffffffff, 0x00060005,
1292         0x3c044, 0xffffffff, 0x00090008,
1293         0x3c000, 0xffffffff, 0x96e00200,
1294         0x8708, 0xffffffff, 0x00900100,
1295         0xc424, 0xffffffff, 0x0020003f,
1296         0x38, 0xffffffff, 0x0140001c,
1297         0x3c, 0x000f0000, 0x000f0000,
1298         0x220, 0xffffffff, 0xC060000C,
1299         0x224, 0xc0000fff, 0x00000100,
1300         0x20a8, 0xffffffff, 0x00000104,
1301         0x55e4, 0xff000fff, 0x00000100,
1302         0x30cc, 0xc0000fff, 0x00000104,
1303         0xc1e4, 0x00000001, 0x00000001,
1304         0xd00c, 0xff000ff0, 0x00000100,
1305         0xd80c, 0xff000ff0, 0x00000100
1306 };
1307
1308 static const u32 hawaii_golden_spm_registers[] =
1309 {
1310         0x30800, 0xe0ffffff, 0xe0000000
1311 };
1312
1313 static const u32 hawaii_golden_common_registers[] =
1314 {
1315         0x30800, 0xffffffff, 0xe0000000,
1316         0x28350, 0xffffffff, 0x3a00161a,
1317         0x28354, 0xffffffff, 0x0000002e,
1318         0x9a10, 0xffffffff, 0x00018208,
1319         0x98f8, 0xffffffff, 0x12011003
1320 };
1321
1322 static const u32 hawaii_golden_registers[] =
1323 {
1324         0x3354, 0x00000333, 0x00000333,
1325         0x9a10, 0x00010000, 0x00058208,
1326         0x9830, 0xffffffff, 0x00000000,
1327         0x9834, 0xf00fffff, 0x00000400,
1328         0x9838, 0x0002021c, 0x00020200,
1329         0xc78, 0x00000080, 0x00000000,
1330         0x5bb0, 0x000000f0, 0x00000070,
1331         0x5bc0, 0xf0311fff, 0x80300000,
1332         0x350c, 0x00810000, 0x408af000,
1333         0x7030, 0x31000111, 0x00000011,
1334         0x2f48, 0x73773777, 0x12010001,
1335         0x2120, 0x0000007f, 0x0000001b,
1336         0x21dc, 0x00007fb6, 0x00002191,
1337         0x3628, 0x0000003f, 0x0000000a,
1338         0x362c, 0x0000003f, 0x0000000a,
1339         0x2ae4, 0x00073ffe, 0x000022a2,
1340         0x240c, 0x000007ff, 0x00000000,
1341         0x8bf0, 0x00002001, 0x00000001,
1342         0x8b24, 0xffffffff, 0x00ffffff,
1343         0x30a04, 0x0000ff0f, 0x00000000,
1344         0x28a4c, 0x07ffffff, 0x06000000,
1345         0x3e78, 0x00000001, 0x00000002,
1346         0xc768, 0x00000008, 0x00000008,
1347         0xc770, 0x00000f00, 0x00000800,
1348         0xc774, 0x00000f00, 0x00000800,
1349         0xc798, 0x00ffffff, 0x00ff7fbf,
1350         0xc79c, 0x00ffffff, 0x00ff7faf,
1351         0x8c00, 0x000000ff, 0x00000800,
1352         0xe40, 0x00001fff, 0x00001fff,
1353         0x9060, 0x0000007f, 0x00000020,
1354         0x9508, 0x00010000, 0x00010000,
1355         0xae00, 0x00100000, 0x000ff07c,
1356         0xac14, 0x000003ff, 0x0000000f,
1357         0xac10, 0xffffffff, 0x7564fdec,
1358         0xac0c, 0xffffffff, 0x3120b9a8,
1359         0xac08, 0x20000000, 0x0f9c0000
1360 };
1361
1362 static const u32 hawaii_mgcg_cgcg_init[] =
1363 {
1364         0xc420, 0xffffffff, 0xfffffffd,
1365         0x30800, 0xffffffff, 0xe0000000,
1366         0x3c2a0, 0xffffffff, 0x00000100,
1367         0x3c208, 0xffffffff, 0x00000100,
1368         0x3c2c0, 0xffffffff, 0x00000100,
1369         0x3c2c8, 0xffffffff, 0x00000100,
1370         0x3c2c4, 0xffffffff, 0x00000100,
1371         0x55e4, 0xffffffff, 0x00200100,
1372         0x3c280, 0xffffffff, 0x00000100,
1373         0x3c214, 0xffffffff, 0x06000100,
1374         0x3c220, 0xffffffff, 0x00000100,
1375         0x3c218, 0xffffffff, 0x06000100,
1376         0x3c204, 0xffffffff, 0x00000100,
1377         0x3c2e0, 0xffffffff, 0x00000100,
1378         0x3c224, 0xffffffff, 0x00000100,
1379         0x3c200, 0xffffffff, 0x00000100,
1380         0x3c230, 0xffffffff, 0x00000100,
1381         0x3c234, 0xffffffff, 0x00000100,
1382         0x3c250, 0xffffffff, 0x00000100,
1383         0x3c254, 0xffffffff, 0x00000100,
1384         0x3c258, 0xffffffff, 0x00000100,
1385         0x3c25c, 0xffffffff, 0x00000100,
1386         0x3c260, 0xffffffff, 0x00000100,
1387         0x3c27c, 0xffffffff, 0x00000100,
1388         0x3c278, 0xffffffff, 0x00000100,
1389         0x3c210, 0xffffffff, 0x06000100,
1390         0x3c290, 0xffffffff, 0x00000100,
1391         0x3c274, 0xffffffff, 0x00000100,
1392         0x3c2b4, 0xffffffff, 0x00000100,
1393         0x3c2b0, 0xffffffff, 0x00000100,
1394         0x3c270, 0xffffffff, 0x00000100,
1395         0x30800, 0xffffffff, 0xe0000000,
1396         0x3c020, 0xffffffff, 0x00010000,
1397         0x3c024, 0xffffffff, 0x00030002,
1398         0x3c028, 0xffffffff, 0x00040007,
1399         0x3c02c, 0xffffffff, 0x00060005,
1400         0x3c030, 0xffffffff, 0x00090008,
1401         0x3c034, 0xffffffff, 0x00010000,
1402         0x3c038, 0xffffffff, 0x00030002,
1403         0x3c03c, 0xffffffff, 0x00040007,
1404         0x3c040, 0xffffffff, 0x00060005,
1405         0x3c044, 0xffffffff, 0x00090008,
1406         0x3c048, 0xffffffff, 0x00010000,
1407         0x3c04c, 0xffffffff, 0x00030002,
1408         0x3c050, 0xffffffff, 0x00040007,
1409         0x3c054, 0xffffffff, 0x00060005,
1410         0x3c058, 0xffffffff, 0x00090008,
1411         0x3c05c, 0xffffffff, 0x00010000,
1412         0x3c060, 0xffffffff, 0x00030002,
1413         0x3c064, 0xffffffff, 0x00040007,
1414         0x3c068, 0xffffffff, 0x00060005,
1415         0x3c06c, 0xffffffff, 0x00090008,
1416         0x3c070, 0xffffffff, 0x00010000,
1417         0x3c074, 0xffffffff, 0x00030002,
1418         0x3c078, 0xffffffff, 0x00040007,
1419         0x3c07c, 0xffffffff, 0x00060005,
1420         0x3c080, 0xffffffff, 0x00090008,
1421         0x3c084, 0xffffffff, 0x00010000,
1422         0x3c088, 0xffffffff, 0x00030002,
1423         0x3c08c, 0xffffffff, 0x00040007,
1424         0x3c090, 0xffffffff, 0x00060005,
1425         0x3c094, 0xffffffff, 0x00090008,
1426         0x3c098, 0xffffffff, 0x00010000,
1427         0x3c09c, 0xffffffff, 0x00030002,
1428         0x3c0a0, 0xffffffff, 0x00040007,
1429         0x3c0a4, 0xffffffff, 0x00060005,
1430         0x3c0a8, 0xffffffff, 0x00090008,
1431         0x3c0ac, 0xffffffff, 0x00010000,
1432         0x3c0b0, 0xffffffff, 0x00030002,
1433         0x3c0b4, 0xffffffff, 0x00040007,
1434         0x3c0b8, 0xffffffff, 0x00060005,
1435         0x3c0bc, 0xffffffff, 0x00090008,
1436         0x3c0c0, 0xffffffff, 0x00010000,
1437         0x3c0c4, 0xffffffff, 0x00030002,
1438         0x3c0c8, 0xffffffff, 0x00040007,
1439         0x3c0cc, 0xffffffff, 0x00060005,
1440         0x3c0d0, 0xffffffff, 0x00090008,
1441         0x3c0d4, 0xffffffff, 0x00010000,
1442         0x3c0d8, 0xffffffff, 0x00030002,
1443         0x3c0dc, 0xffffffff, 0x00040007,
1444         0x3c0e0, 0xffffffff, 0x00060005,
1445         0x3c0e4, 0xffffffff, 0x00090008,
1446         0x3c0e8, 0xffffffff, 0x00010000,
1447         0x3c0ec, 0xffffffff, 0x00030002,
1448         0x3c0f0, 0xffffffff, 0x00040007,
1449         0x3c0f4, 0xffffffff, 0x00060005,
1450         0x3c0f8, 0xffffffff, 0x00090008,
1451         0xc318, 0xffffffff, 0x00020200,
1452         0x3350, 0xffffffff, 0x00000200,
1453         0x15c0, 0xffffffff, 0x00000400,
1454         0x55e8, 0xffffffff, 0x00000000,
1455         0x2f50, 0xffffffff, 0x00000902,
1456         0x3c000, 0xffffffff, 0x96940200,
1457         0x8708, 0xffffffff, 0x00900100,
1458         0xc424, 0xffffffff, 0x0020003f,
1459         0x38, 0xffffffff, 0x0140001c,
1460         0x3c, 0x000f0000, 0x000f0000,
1461         0x220, 0xffffffff, 0xc060000c,
1462         0x224, 0xc0000fff, 0x00000100,
1463         0xf90, 0xffffffff, 0x00000100,
1464         0xf98, 0x00000101, 0x00000000,
1465         0x20a8, 0xffffffff, 0x00000104,
1466         0x55e4, 0xff000fff, 0x00000100,
1467         0x30cc, 0xc0000fff, 0x00000104,
1468         0xc1e4, 0x00000001, 0x00000001,
1469         0xd00c, 0xff000ff0, 0x00000100,
1470         0xd80c, 0xff000ff0, 0x00000100
1471 };
1472
1473 static void cik_init_golden_registers(struct radeon_device *rdev)
1474 {
1475         switch (rdev->family) {
1476         case CHIP_BONAIRE:
1477                 radeon_program_register_sequence(rdev,
1478                                                  bonaire_mgcg_cgcg_init,
1479                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1480                 radeon_program_register_sequence(rdev,
1481                                                  bonaire_golden_registers,
1482                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1483                 radeon_program_register_sequence(rdev,
1484                                                  bonaire_golden_common_registers,
1485                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1486                 radeon_program_register_sequence(rdev,
1487                                                  bonaire_golden_spm_registers,
1488                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1489                 break;
1490         case CHIP_KABINI:
1491                 radeon_program_register_sequence(rdev,
1492                                                  kalindi_mgcg_cgcg_init,
1493                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1494                 radeon_program_register_sequence(rdev,
1495                                                  kalindi_golden_registers,
1496                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1497                 radeon_program_register_sequence(rdev,
1498                                                  kalindi_golden_common_registers,
1499                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1500                 radeon_program_register_sequence(rdev,
1501                                                  kalindi_golden_spm_registers,
1502                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1503                 break;
1504         case CHIP_KAVERI:
1505                 radeon_program_register_sequence(rdev,
1506                                                  spectre_mgcg_cgcg_init,
1507                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1508                 radeon_program_register_sequence(rdev,
1509                                                  spectre_golden_registers,
1510                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1511                 radeon_program_register_sequence(rdev,
1512                                                  spectre_golden_common_registers,
1513                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1514                 radeon_program_register_sequence(rdev,
1515                                                  spectre_golden_spm_registers,
1516                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1517                 break;
1518         case CHIP_HAWAII:
1519                 radeon_program_register_sequence(rdev,
1520                                                  hawaii_mgcg_cgcg_init,
1521                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1522                 radeon_program_register_sequence(rdev,
1523                                                  hawaii_golden_registers,
1524                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1525                 radeon_program_register_sequence(rdev,
1526                                                  hawaii_golden_common_registers,
1527                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1528                 radeon_program_register_sequence(rdev,
1529                                                  hawaii_golden_spm_registers,
1530                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1531                 break;
1532         default:
1533                 break;
1534         }
1535 }
1536
1537 /**
1538  * cik_get_xclk - get the xclk
1539  *
1540  * @rdev: radeon_device pointer
1541  *
1542  * Returns the reference clock used by the gfx engine
1543  * (CIK).
1544  */
1545 u32 cik_get_xclk(struct radeon_device *rdev)
1546 {
1547         u32 reference_clock = rdev->clock.spll.reference_freq;
1548
1549         if (rdev->flags & RADEON_IS_IGP) {
1550                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1551                         return reference_clock / 2;
1552         } else {
1553                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1554                         return reference_clock / 4;
1555         }
1556         return reference_clock;
1557 }
1558
1559 /**
1560  * cik_mm_rdoorbell - read a doorbell dword
1561  *
1562  * @rdev: radeon_device pointer
1563  * @index: doorbell index
1564  *
1565  * Returns the value in the doorbell aperture at the
1566  * requested doorbell index (CIK).
1567  */
1568 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1569 {
1570         if (index < rdev->doorbell.num_doorbells) {
1571                 return readl(rdev->doorbell.ptr + index);
1572         } else {
1573                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1574                 return 0;
1575         }
1576 }
1577
1578 /**
1579  * cik_mm_wdoorbell - write a doorbell dword
1580  *
1581  * @rdev: radeon_device pointer
1582  * @index: doorbell index
1583  * @v: value to write
1584  *
1585  * Writes @v to the doorbell aperture at the
1586  * requested doorbell index (CIK).
1587  */
1588 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1589 {
1590         if (index < rdev->doorbell.num_doorbells) {
1591                 writel(v, rdev->doorbell.ptr + index);
1592         } else {
1593                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1594         }
1595 }
1596
1597 #define BONAIRE_IO_MC_REGS_SIZE 36
1598
1599 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1600 {
1601         {0x00000070, 0x04400000},
1602         {0x00000071, 0x80c01803},
1603         {0x00000072, 0x00004004},
1604         {0x00000073, 0x00000100},
1605         {0x00000074, 0x00ff0000},
1606         {0x00000075, 0x34000000},
1607         {0x00000076, 0x08000014},
1608         {0x00000077, 0x00cc08ec},
1609         {0x00000078, 0x00000400},
1610         {0x00000079, 0x00000000},
1611         {0x0000007a, 0x04090000},
1612         {0x0000007c, 0x00000000},
1613         {0x0000007e, 0x4408a8e8},
1614         {0x0000007f, 0x00000304},
1615         {0x00000080, 0x00000000},
1616         {0x00000082, 0x00000001},
1617         {0x00000083, 0x00000002},
1618         {0x00000084, 0xf3e4f400},
1619         {0x00000085, 0x052024e3},
1620         {0x00000087, 0x00000000},
1621         {0x00000088, 0x01000000},
1622         {0x0000008a, 0x1c0a0000},
1623         {0x0000008b, 0xff010000},
1624         {0x0000008d, 0xffffefff},
1625         {0x0000008e, 0xfff3efff},
1626         {0x0000008f, 0xfff3efbf},
1627         {0x00000092, 0xf7ffffff},
1628         {0x00000093, 0xffffff7f},
1629         {0x00000095, 0x00101101},
1630         {0x00000096, 0x00000fff},
1631         {0x00000097, 0x00116fff},
1632         {0x00000098, 0x60010000},
1633         {0x00000099, 0x10010000},
1634         {0x0000009a, 0x00006000},
1635         {0x0000009b, 0x00001000},
1636         {0x0000009f, 0x00b48000}
1637 };
1638
1639 #define HAWAII_IO_MC_REGS_SIZE 22
1640
1641 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1642 {
1643         {0x0000007d, 0x40000000},
1644         {0x0000007e, 0x40180304},
1645         {0x0000007f, 0x0000ff00},
1646         {0x00000081, 0x00000000},
1647         {0x00000083, 0x00000800},
1648         {0x00000086, 0x00000000},
1649         {0x00000087, 0x00000100},
1650         {0x00000088, 0x00020100},
1651         {0x00000089, 0x00000000},
1652         {0x0000008b, 0x00040000},
1653         {0x0000008c, 0x00000100},
1654         {0x0000008e, 0xff010000},
1655         {0x00000090, 0xffffefff},
1656         {0x00000091, 0xfff3efff},
1657         {0x00000092, 0xfff3efbf},
1658         {0x00000093, 0xf7ffffff},
1659         {0x00000094, 0xffffff7f},
1660         {0x00000095, 0x00000fff},
1661         {0x00000096, 0x00116fff},
1662         {0x00000097, 0x60010000},
1663         {0x00000098, 0x10010000},
1664         {0x0000009f, 0x00c79000}
1665 };
1666
1667
1668 /**
1669  * cik_srbm_select - select specific register instances
1670  *
1671  * @rdev: radeon_device pointer
1672  * @me: selected ME (micro engine)
1673  * @pipe: pipe
1674  * @queue: queue
1675  * @vmid: VMID
1676  *
1677  * Switches the currently active registers instances.  Some
1678  * registers are instanced per VMID, others are instanced per
1679  * me/pipe/queue combination.
1680  */
1681 static void cik_srbm_select(struct radeon_device *rdev,
1682                             u32 me, u32 pipe, u32 queue, u32 vmid)
1683 {
1684         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1685                              MEID(me & 0x3) |
1686                              VMID(vmid & 0xf) |
1687                              QUEUEID(queue & 0x7));
1688         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1689 }
1690
1691 /* ucode loading */
1692 /**
1693  * ci_mc_load_microcode - load MC ucode into the hw
1694  *
1695  * @rdev: radeon_device pointer
1696  *
1697  * Load the GDDR MC ucode into the hw (CIK).
1698  * Returns 0 on success, error on failure.
1699  */
1700 int ci_mc_load_microcode(struct radeon_device *rdev)
1701 {
1702         const __be32 *fw_data;
1703         u32 running, blackout = 0;
1704         u32 *io_mc_regs;
1705         int i, ucode_size, regs_size;
1706
1707         if (!rdev->mc_fw)
1708                 return -EINVAL;
1709
1710         switch (rdev->family) {
1711         case CHIP_BONAIRE:
1712                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1713                 ucode_size = CIK_MC_UCODE_SIZE;
1714                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1715                 break;
1716         case CHIP_HAWAII:
1717                 io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1718                 ucode_size = HAWAII_MC_UCODE_SIZE;
1719                 regs_size = HAWAII_IO_MC_REGS_SIZE;
1720                 break;
1721         default:
1722                 return -EINVAL;
1723         }
1724
1725         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1726
1727         if (running == 0) {
1728                 if (running) {
1729                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1730                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1731                 }
1732
1733                 /* reset the engine and set to writable */
1734                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1735                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1736
1737                 /* load mc io regs */
1738                 for (i = 0; i < regs_size; i++) {
1739                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1740                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1741                 }
1742                 /* load the MC ucode */
1743                 fw_data = (const __be32 *)rdev->mc_fw->data;
1744                 for (i = 0; i < ucode_size; i++)
1745                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1746
1747                 /* put the engine back into the active state */
1748                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1749                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1750                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1751
1752                 /* wait for training to complete */
1753                 for (i = 0; i < rdev->usec_timeout; i++) {
1754                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1755                                 break;
1756                         udelay(1);
1757                 }
1758                 for (i = 0; i < rdev->usec_timeout; i++) {
1759                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1760                                 break;
1761                         udelay(1);
1762                 }
1763
1764                 if (running)
1765                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1766         }
1767
1768         return 0;
1769 }
1770
1771 /**
1772  * cik_init_microcode - load ucode images from disk
1773  *
1774  * @rdev: radeon_device pointer
1775  *
1776  * Use the firmware interface to load the ucode images into
1777  * the driver (not loaded into hw).
1778  * Returns 0 on success, error on failure.
1779  */
1780 static int cik_init_microcode(struct radeon_device *rdev)
1781 {
1782         const char *chip_name;
1783         size_t pfp_req_size, me_req_size, ce_req_size,
1784                 mec_req_size, rlc_req_size, mc_req_size = 0,
1785                 sdma_req_size, smc_req_size = 0;
1786         char fw_name[30];
1787         int err;
1788
1789         DRM_DEBUG("\n");
1790
1791         switch (rdev->family) {
1792         case CHIP_BONAIRE:
1793                 chip_name = "BONAIRE";
1794                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1795                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1796                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1797                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1798                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1799                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1800                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1801                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1802                 break;
1803         case CHIP_HAWAII:
1804                 chip_name = "HAWAII";
1805                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1806                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1807                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1808                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1809                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1810                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1811                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1812                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1813                 break;
1814         case CHIP_KAVERI:
1815                 chip_name = "KAVERI";
1816                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1817                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1818                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1819                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1820                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1821                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1822                 break;
1823         case CHIP_KABINI:
1824                 chip_name = "KABINI";
1825                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1826                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1827                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1828                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1829                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1830                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1831                 break;
1832         default: BUG();
1833         }
1834
1835         DRM_INFO("Loading %s Microcode\n", chip_name);
1836
1837         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1838         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1839         if (err)
1840                 goto out;
1841         if (rdev->pfp_fw->size != pfp_req_size) {
1842                 printk(KERN_ERR
1843                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1844                        rdev->pfp_fw->size, fw_name);
1845                 err = -EINVAL;
1846                 goto out;
1847         }
1848
1849         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1850         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1851         if (err)
1852                 goto out;
1853         if (rdev->me_fw->size != me_req_size) {
1854                 printk(KERN_ERR
1855                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1856                        rdev->me_fw->size, fw_name);
1857                 err = -EINVAL;
1858         }
1859
1860         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1861         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1862         if (err)
1863                 goto out;
1864         if (rdev->ce_fw->size != ce_req_size) {
1865                 printk(KERN_ERR
1866                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1867                        rdev->ce_fw->size, fw_name);
1868                 err = -EINVAL;
1869         }
1870
1871         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1872         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1873         if (err)
1874                 goto out;
1875         if (rdev->mec_fw->size != mec_req_size) {
1876                 printk(KERN_ERR
1877                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1878                        rdev->mec_fw->size, fw_name);
1879                 err = -EINVAL;
1880         }
1881
1882         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1883         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1884         if (err)
1885                 goto out;
1886         if (rdev->rlc_fw->size != rlc_req_size) {
1887                 printk(KERN_ERR
1888                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1889                        rdev->rlc_fw->size, fw_name);
1890                 err = -EINVAL;
1891         }
1892
1893         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1894         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1895         if (err)
1896                 goto out;
1897         if (rdev->sdma_fw->size != sdma_req_size) {
1898                 printk(KERN_ERR
1899                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1900                        rdev->sdma_fw->size, fw_name);
1901                 err = -EINVAL;
1902         }
1903
1904         /* No SMC, MC ucode on APUs */
1905         if (!(rdev->flags & RADEON_IS_IGP)) {
1906                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1907                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1908                 if (err)
1909                         goto out;
1910                 if (rdev->mc_fw->size != mc_req_size) {
1911                         printk(KERN_ERR
1912                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1913                                rdev->mc_fw->size, fw_name);
1914                         err = -EINVAL;
1915                 }
1916
1917                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1918                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1919                 if (err) {
1920                         printk(KERN_ERR
1921                                "smc: error loading firmware \"%s\"\n",
1922                                fw_name);
1923                         release_firmware(rdev->smc_fw);
1924                         rdev->smc_fw = NULL;
1925                         err = 0;
1926                 } else if (rdev->smc_fw->size != smc_req_size) {
1927                         printk(KERN_ERR
1928                                "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1929                                rdev->smc_fw->size, fw_name);
1930                         err = -EINVAL;
1931                 }
1932         }
1933
1934 out:
1935         if (err) {
1936                 if (err != -EINVAL)
1937                         printk(KERN_ERR
1938                                "cik_cp: Failed to load firmware \"%s\"\n",
1939                                fw_name);
1940                 release_firmware(rdev->pfp_fw);
1941                 rdev->pfp_fw = NULL;
1942                 release_firmware(rdev->me_fw);
1943                 rdev->me_fw = NULL;
1944                 release_firmware(rdev->ce_fw);
1945                 rdev->ce_fw = NULL;
1946                 release_firmware(rdev->rlc_fw);
1947                 rdev->rlc_fw = NULL;
1948                 release_firmware(rdev->mc_fw);
1949                 rdev->mc_fw = NULL;
1950                 release_firmware(rdev->smc_fw);
1951                 rdev->smc_fw = NULL;
1952         }
1953         return err;
1954 }
1955
1956 /*
1957  * Core functions
1958  */
1959 /**
1960  * cik_tiling_mode_table_init - init the hw tiling table
1961  *
1962  * @rdev: radeon_device pointer
1963  *
1964  * Starting with SI, the tiling setup is done globally in a
1965  * set of 32 tiling modes.  Rather than selecting each set of
1966  * parameters per surface as on older asics, we just select
1967  * which index in the tiling table we want to use, and the
1968  * surface uses those parameters (CIK).
1969  */
1970 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1971 {
1972         const u32 num_tile_mode_states = 32;
1973         const u32 num_secondary_tile_mode_states = 16;
1974         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1975         u32 num_pipe_configs;
1976         u32 num_rbs = rdev->config.cik.max_backends_per_se *
1977                 rdev->config.cik.max_shader_engines;
1978
1979         switch (rdev->config.cik.mem_row_size_in_kb) {
1980         case 1:
1981                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1982                 break;
1983         case 2:
1984         default:
1985                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1986                 break;
1987         case 4:
1988                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1989                 break;
1990         }
1991
1992         num_pipe_configs = rdev->config.cik.max_tile_pipes;
1993         if (num_pipe_configs > 8)
1994                 num_pipe_configs = 16;
1995
1996         if (num_pipe_configs == 16) {
1997                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1998                         switch (reg_offset) {
1999                         case 0:
2000                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2001                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2002                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2003                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2004                                 break;
2005                         case 1:
2006                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2007                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2008                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2009                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2010                                 break;
2011                         case 2:
2012                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2013                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2014                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2015                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2016                                 break;
2017                         case 3:
2018                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2019                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2020                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2021                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2022                                 break;
2023                         case 4:
2024                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2025                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2026                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2027                                                  TILE_SPLIT(split_equal_to_row_size));
2028                                 break;
2029                         case 5:
2030                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2031                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2032                                 break;
2033                         case 6:
2034                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2035                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2036                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2037                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2038                                 break;
2039                         case 7:
2040                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2041                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2042                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2043                                                  TILE_SPLIT(split_equal_to_row_size));
2044                                 break;
2045                         case 8:
2046                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2047                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2048                                 break;
2049                         case 9:
2050                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2051                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2052                                 break;
2053                         case 10:
2054                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2055                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2056                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2057                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2058                                 break;
2059                         case 11:
2060                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2061                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2062                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2063                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2064                                 break;
2065                         case 12:
2066                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2067                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2068                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2069                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2070                                 break;
2071                         case 13:
2072                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2073                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2074                                 break;
2075                         case 14:
2076                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2077                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2078                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2079                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2080                                 break;
2081                         case 16:
2082                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2083                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2084                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2085                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2086                                 break;
2087                         case 17:
2088                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2089                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2090                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2091                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2092                                 break;
2093                         case 27:
2094                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2095                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2096                                 break;
2097                         case 28:
2098                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2099                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2100                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2101                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2102                                 break;
2103                         case 29:
2104                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2105                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2106                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2107                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2108                                 break;
2109                         case 30:
2110                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2111                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2112                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2113                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2114                                 break;
2115                         default:
2116                                 gb_tile_moden = 0;
2117                                 break;
2118                         }
2119                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2120                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2121                 }
2122                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2123                         switch (reg_offset) {
2124                         case 0:
2125                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2126                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2127                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2128                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2129                                 break;
2130                         case 1:
2131                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2132                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2133                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2134                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2135                                 break;
2136                         case 2:
2137                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2138                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2139                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2140                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2141                                 break;
2142                         case 3:
2143                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2144                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2145                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2146                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2147                                 break;
2148                         case 4:
2149                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2150                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2151                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2152                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2153                                 break;
2154                         case 5:
2155                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2156                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2157                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2158                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2159                                 break;
2160                         case 6:
2161                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2162                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2163                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2164                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2165                                 break;
2166                         case 8:
2167                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2168                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2169                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2170                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2171                                 break;
2172                         case 9:
2173                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2174                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2175                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2176                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2177                                 break;
2178                         case 10:
2179                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2180                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2181                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2182                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2183                                 break;
2184                         case 11:
2185                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2187                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2188                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2189                                 break;
2190                         case 12:
2191                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2192                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2193                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2194                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2195                                 break;
2196                         case 13:
2197                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2199                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2200                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2201                                 break;
2202                         case 14:
2203                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2204                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2205                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2206                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2207                                 break;
2208                         default:
2209                                 gb_tile_moden = 0;
2210                                 break;
2211                         }
2212                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2213                 }
2214         } else if (num_pipe_configs == 8) {
2215                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2216                         switch (reg_offset) {
2217                         case 0:
2218                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2219                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2220                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2221                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2222                                 break;
2223                         case 1:
2224                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2225                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2226                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2227                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2228                                 break;
2229                         case 2:
2230                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2231                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2232                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2233                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2234                                 break;
2235                         case 3:
2236                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2238                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2239                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2240                                 break;
2241                         case 4:
2242                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2244                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2245                                                  TILE_SPLIT(split_equal_to_row_size));
2246                                 break;
2247                         case 5:
2248                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2249                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2250                                 break;
2251                         case 6:
2252                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2253                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2254                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2255                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2256                                 break;
2257                         case 7:
2258                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2259                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2260                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2261                                                  TILE_SPLIT(split_equal_to_row_size));
2262                                 break;
2263                         case 8:
2264                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2265                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2266                                 break;
2267                         case 9:
2268                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2269                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2270                                 break;
2271                         case 10:
2272                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2273                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2274                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2275                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2276                                 break;
2277                         case 11:
2278                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2279                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2280                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2281                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2282                                 break;
2283                         case 12:
2284                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2285                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2286                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2287                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2288                                 break;
2289                         case 13:
2290                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2291                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2292                                 break;
2293                         case 14:
2294                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2296                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2297                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2298                                 break;
2299                         case 16:
2300                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2301                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2302                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2303                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2304                                 break;
2305                         case 17:
2306                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2307                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2308                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2309                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2310                                 break;
2311                         case 27:
2312                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2313                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2314                                 break;
2315                         case 28:
2316                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2318                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2319                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2320                                 break;
2321                         case 29:
2322                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2323                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2324                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2325                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2326                                 break;
2327                         case 30:
2328                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2329                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2330                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2331                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2332                                 break;
2333                         default:
2334                                 gb_tile_moden = 0;
2335                                 break;
2336                         }
2337                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2338                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2339                 }
2340                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2341                         switch (reg_offset) {
2342                         case 0:
2343                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2345                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2346                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2347                                 break;
2348                         case 1:
2349                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2350                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2351                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2352                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2353                                 break;
2354                         case 2:
2355                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2357                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2359                                 break;
2360                         case 3:
2361                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2363                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2364                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2365                                 break;
2366                         case 4:
2367                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2369                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2370                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2371                                 break;
2372                         case 5:
2373                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2374                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2375                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2376                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2377                                 break;
2378                         case 6:
2379                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2382                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2383                                 break;
2384                         case 8:
2385                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2386                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2387                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2388                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2389                                 break;
2390                         case 9:
2391                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2393                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2394                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2395                                 break;
2396                         case 10:
2397                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2399                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2400                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2401                                 break;
2402                         case 11:
2403                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2405                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2406                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2407                                 break;
2408                         case 12:
2409                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2410                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2411                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2412                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2413                                 break;
2414                         case 13:
2415                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2416                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2417                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2418                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2419                                 break;
2420                         case 14:
2421                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2423                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2424                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2425                                 break;
2426                         default:
2427                                 gb_tile_moden = 0;
2428                                 break;
2429                         }
2430                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2431                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2432                 }
2433         } else if (num_pipe_configs == 4) {
2434                 if (num_rbs == 4) {
2435                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2436                                 switch (reg_offset) {
2437                                 case 0:
2438                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2440                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2441                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2442                                         break;
2443                                 case 1:
2444                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2445                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2446                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2447                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2448                                         break;
2449                                 case 2:
2450                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2452                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2453                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2454                                         break;
2455                                 case 3:
2456                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2457                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2458                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2459                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2460                                         break;
2461                                 case 4:
2462                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2463                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2464                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2465                                                          TILE_SPLIT(split_equal_to_row_size));
2466                                         break;
2467                                 case 5:
2468                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2469                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2470                                         break;
2471                                 case 6:
2472                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2473                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2474                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2475                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2476                                         break;
2477                                 case 7:
2478                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2479                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2480                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2481                                                          TILE_SPLIT(split_equal_to_row_size));
2482                                         break;
2483                                 case 8:
2484                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2485                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2486                                         break;
2487                                 case 9:
2488                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2489                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2490                                         break;
2491                                 case 10:
2492                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2493                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2494                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2495                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2496                                         break;
2497                                 case 11:
2498                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2499                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2500                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2501                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2502                                         break;
2503                                 case 12:
2504                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2505                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2506                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2507                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2508                                         break;
2509                                 case 13:
2510                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2511                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2512                                         break;
2513                                 case 14:
2514                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2515                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2516                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2517                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2518                                         break;
2519                                 case 16:
2520                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2521                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2522                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2523                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2524                                         break;
2525                                 case 17:
2526                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2527                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2528                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2529                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2530                                         break;
2531                                 case 27:
2532                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2533                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2534                                         break;
2535                                 case 28:
2536                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2538                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2539                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540                                         break;
2541                                 case 29:
2542                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2543                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2544                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2545                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2546                                         break;
2547                                 case 30:
2548                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2549                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2550                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2551                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552                                         break;
2553                                 default:
2554                                         gb_tile_moden = 0;
2555                                         break;
2556                                 }
2557                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2558                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2559                         }
2560                 } else if (num_rbs < 4) {
2561                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2562                                 switch (reg_offset) {
2563                                 case 0:
2564                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2566                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2567                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2568                                         break;
2569                                 case 1:
2570                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2571                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2572                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2573                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2574                                         break;
2575                                 case 2:
2576                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2577                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2578                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2579                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2580                                         break;
2581                                 case 3:
2582                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2583                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2584                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2585                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2586                                         break;
2587                                 case 4:
2588                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2590                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2591                                                          TILE_SPLIT(split_equal_to_row_size));
2592                                         break;
2593                                 case 5:
2594                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2595                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2596                                         break;
2597                                 case 6:
2598                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2599                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2600                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2601                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2602                                         break;
2603                                 case 7:
2604                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2605                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2606                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2607                                                          TILE_SPLIT(split_equal_to_row_size));
2608                                         break;
2609                                 case 8:
2610                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2611                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2612                                         break;
2613                                 case 9:
2614                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2615                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2616                                         break;
2617                                 case 10:
2618                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2619                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2620                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2621                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2622                                         break;
2623                                 case 11:
2624                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2626                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2627                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2628                                         break;
2629                                 case 12:
2630                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2631                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2632                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2633                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2634                                         break;
2635                                 case 13:
2636                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2637                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2638                                         break;
2639                                 case 14:
2640                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2642                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2643                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644                                         break;
2645                                 case 16:
2646                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2647                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2648                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2649                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650                                         break;
2651                                 case 17:
2652                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2653                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2654                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2655                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2656                                         break;
2657                                 case 27:
2658                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2659                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2660                                         break;
2661                                 case 28:
2662                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2663                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2664                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2665                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2666                                         break;
2667                                 case 29:
2668                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2669                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2670                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2671                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2672                                         break;
2673                                 case 30:
2674                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2675                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2677                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2678                                         break;
2679                                 default:
2680                                         gb_tile_moden = 0;
2681                                         break;
2682                                 }
2683                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2684                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2685                         }
2686                 }
2687                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2688                         switch (reg_offset) {
2689                         case 0:
2690                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2693                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2694                                 break;
2695                         case 1:
2696                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2698                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2699                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2700                                 break;
2701                         case 2:
2702                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2704                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2705                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2706                                 break;
2707                         case 3:
2708                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2710                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2711                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2712                                 break;
2713                         case 4:
2714                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2717                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2718                                 break;
2719                         case 5:
2720                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2722                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2723                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2724                                 break;
2725                         case 6:
2726                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2729                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2730                                 break;
2731                         case 8:
2732                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2733                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2734                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2735                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2736                                 break;
2737                         case 9:
2738                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2739                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2740                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2741                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2742                                 break;
2743                         case 10:
2744                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2746                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2747                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2748                                 break;
2749                         case 11:
2750                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2752                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2753                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2754                                 break;
2755                         case 12:
2756                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2759                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2760                                 break;
2761                         case 13:
2762                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2765                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2766                                 break;
2767                         case 14:
2768                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2770                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2771                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2772                                 break;
2773                         default:
2774                                 gb_tile_moden = 0;
2775                                 break;
2776                         }
2777                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2778                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2779                 }
2780         } else if (num_pipe_configs == 2) {
2781                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2782                         switch (reg_offset) {
2783                         case 0:
2784                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2786                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2787                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2788                                 break;
2789                         case 1:
2790                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2791                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2792                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2793                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2794                                 break;
2795                         case 2:
2796                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2798                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2799                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2800                                 break;
2801                         case 3:
2802                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2803                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2804                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2805                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2806                                 break;
2807                         case 4:
2808                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2810                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2811                                                  TILE_SPLIT(split_equal_to_row_size));
2812                                 break;
2813                         case 5:
2814                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2815                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2816                                 break;
2817                         case 6:
2818                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2819                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2820                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2821                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2822                                 break;
2823                         case 7:
2824                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2825                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2826                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2827                                                  TILE_SPLIT(split_equal_to_row_size));
2828                                 break;
2829                         case 8:
2830                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2831                                 break;
2832                         case 9:
2833                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2834                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2835                                 break;
2836                         case 10:
2837                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2838                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2839                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2840                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2841                                 break;
2842                         case 11:
2843                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2844                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2845                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2846                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2847                                 break;
2848                         case 12:
2849                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2850                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2851                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2852                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2853                                 break;
2854                         case 13:
2855                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2856                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2857                                 break;
2858                         case 14:
2859                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2860                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2861                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2862                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2863                                 break;
2864                         case 16:
2865                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2866                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2867                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2868                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2869                                 break;
2870                         case 17:
2871                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2872                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2873                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2874                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2875                                 break;
2876                         case 27:
2877                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2878                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2879                                 break;
2880                         case 28:
2881                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2882                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2883                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2884                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2885                                 break;
2886                         case 29:
2887                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2888                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2889                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2890                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2891                                 break;
2892                         case 30:
2893                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2894                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2895                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2896                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2897                                 break;
2898                         default:
2899                                 gb_tile_moden = 0;
2900                                 break;
2901                         }
2902                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2903                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2904                 }
2905                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2906                         switch (reg_offset) {
2907                         case 0:
2908                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2909                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2910                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2911                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2912                                 break;
2913                         case 1:
2914                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2915                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2916                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2917                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2918                                 break;
2919                         case 2:
2920                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2921                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2922                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2923                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2924                                 break;
2925                         case 3:
2926                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2927                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2928                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2929                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2930                                 break;
2931                         case 4:
2932                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2933                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2934                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2935                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2936                                 break;
2937                         case 5:
2938                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2940                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2941                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2942                                 break;
2943                         case 6:
2944                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2945                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2946                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2947                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2948                                 break;
2949                         case 8:
2950                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2951                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2952                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2953                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2954                                 break;
2955                         case 9:
2956                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2957                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2958                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2959                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2960                                 break;
2961                         case 10:
2962                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2963                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2964                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2965                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2966                                 break;
2967                         case 11:
2968                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2969                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2972                                 break;
2973                         case 12:
2974                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2975                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2976                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2977                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2978                                 break;
2979                         case 13:
2980                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2983                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2984                                 break;
2985                         case 14:
2986                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2987                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2988                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2989                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2990                                 break;
2991                         default:
2992                                 gb_tile_moden = 0;
2993                                 break;
2994                         }
2995                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2996                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2997                 }
2998         } else
2999                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3000 }
3001
3002 /**
3003  * cik_select_se_sh - select which SE, SH to address
3004  *
3005  * @rdev: radeon_device pointer
3006  * @se_num: shader engine to address
3007  * @sh_num: sh block to address
3008  *
3009  * Select which SE, SH combinations to address. Certain
3010  * registers are instanced per SE or SH.  0xffffffff means
3011  * broadcast to all SEs or SHs (CIK).
3012  */
3013 static void cik_select_se_sh(struct radeon_device *rdev,
3014                              u32 se_num, u32 sh_num)
3015 {
3016         u32 data = INSTANCE_BROADCAST_WRITES;
3017
3018         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3019                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3020         else if (se_num == 0xffffffff)
3021                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3022         else if (sh_num == 0xffffffff)
3023                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3024         else
3025                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3026         WREG32(GRBM_GFX_INDEX, data);
3027 }
3028
3029 /**
3030  * cik_create_bitmask - create a bitmask
3031  *
3032  * @bit_width: length of the mask
3033  *
3034  * create a variable length bit mask (CIK).
3035  * Returns the bitmask.
3036  */
3037 static u32 cik_create_bitmask(u32 bit_width)
3038 {
3039         u32 i, mask = 0;
3040
3041         for (i = 0; i < bit_width; i++) {
3042                 mask <<= 1;
3043                 mask |= 1;
3044         }
3045         return mask;
3046 }
3047
3048 /**
3049  * cik_select_se_sh - select which SE, SH to address
3050  *
3051  * @rdev: radeon_device pointer
3052  * @max_rb_num: max RBs (render backends) for the asic
3053  * @se_num: number of SEs (shader engines) for the asic
3054  * @sh_per_se: number of SH blocks per SE for the asic
3055  *
3056  * Calculates the bitmask of disabled RBs (CIK).
3057  * Returns the disabled RB bitmask.
3058  */
3059 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3060                               u32 max_rb_num_per_se,
3061                               u32 sh_per_se)
3062 {
3063         u32 data, mask;
3064
3065         data = RREG32(CC_RB_BACKEND_DISABLE);
3066         if (data & 1)
3067                 data &= BACKEND_DISABLE_MASK;
3068         else
3069                 data = 0;
3070         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3071
3072         data >>= BACKEND_DISABLE_SHIFT;
3073
3074         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3075
3076         return data & mask;
3077 }
3078
3079 /**
3080  * cik_setup_rb - setup the RBs on the asic
3081  *
3082  * @rdev: radeon_device pointer
3083  * @se_num: number of SEs (shader engines) for the asic
3084  * @sh_per_se: number of SH blocks per SE for the asic
3085  * @max_rb_num: max RBs (render backends) for the asic
3086  *
3087  * Configures per-SE/SH RB registers (CIK).
3088  */
3089 static void cik_setup_rb(struct radeon_device *rdev,
3090                          u32 se_num, u32 sh_per_se,
3091                          u32 max_rb_num_per_se)
3092 {
3093         int i, j;
3094         u32 data, mask;
3095         u32 disabled_rbs = 0;
3096         u32 enabled_rbs = 0;
3097
3098         for (i = 0; i < se_num; i++) {
3099                 for (j = 0; j < sh_per_se; j++) {
3100                         cik_select_se_sh(rdev, i, j);
3101                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3102                         if (rdev->family == CHIP_HAWAII)
3103                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3104                         else
3105                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3106                 }
3107         }
3108         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3109
3110         mask = 1;
3111         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3112                 if (!(disabled_rbs & mask))
3113                         enabled_rbs |= mask;
3114                 mask <<= 1;
3115         }
3116
3117         rdev->config.cik.backend_enable_mask = enabled_rbs;
3118
3119         for (i = 0; i < se_num; i++) {
3120                 cik_select_se_sh(rdev, i, 0xffffffff);
3121                 data = 0;
3122                 for (j = 0; j < sh_per_se; j++) {
3123                         switch (enabled_rbs & 3) {
3124                         case 0:
3125                                 if (j == 0)
3126                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3127                                 else
3128                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3129                                 break;
3130                         case 1:
3131                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3132                                 break;
3133                         case 2:
3134                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3135                                 break;
3136                         case 3:
3137                         default:
3138                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3139                                 break;
3140                         }
3141                         enabled_rbs >>= 2;
3142                 }
3143                 WREG32(PA_SC_RASTER_CONFIG, data);
3144         }
3145         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3146 }
3147
3148 /**
3149  * cik_gpu_init - setup the 3D engine
3150  *
3151  * @rdev: radeon_device pointer
3152  *
3153  * Configures the 3D engine and tiling configuration
3154  * registers so that the 3D engine is usable.
3155  */
3156 static void cik_gpu_init(struct radeon_device *rdev)
3157 {
3158         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3159         u32 mc_shared_chmap, mc_arb_ramcfg;
3160         u32 hdp_host_path_cntl;
3161         u32 tmp;
3162         int i, j;
3163
3164         switch (rdev->family) {
3165         case CHIP_BONAIRE:
3166                 rdev->config.cik.max_shader_engines = 2;
3167                 rdev->config.cik.max_tile_pipes = 4;
3168                 rdev->config.cik.max_cu_per_sh = 7;
3169                 rdev->config.cik.max_sh_per_se = 1;
3170                 rdev->config.cik.max_backends_per_se = 2;
3171                 rdev->config.cik.max_texture_channel_caches = 4;
3172                 rdev->config.cik.max_gprs = 256;
3173                 rdev->config.cik.max_gs_threads = 32;
3174                 rdev->config.cik.max_hw_contexts = 8;
3175
3176                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3177                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3178                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3179                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3180                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3181                 break;
3182         case CHIP_HAWAII:
3183                 rdev->config.cik.max_shader_engines = 4;
3184                 rdev->config.cik.max_tile_pipes = 16;
3185                 rdev->config.cik.max_cu_per_sh = 11;
3186                 rdev->config.cik.max_sh_per_se = 1;
3187                 rdev->config.cik.max_backends_per_se = 4;
3188                 rdev->config.cik.max_texture_channel_caches = 16;
3189                 rdev->config.cik.max_gprs = 256;
3190                 rdev->config.cik.max_gs_threads = 32;
3191                 rdev->config.cik.max_hw_contexts = 8;
3192
3193                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3194                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3195                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3196                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3197                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3198                 break;
3199         case CHIP_KAVERI:
3200                 rdev->config.cik.max_shader_engines = 1;
3201                 rdev->config.cik.max_tile_pipes = 4;
3202                 if ((rdev->pdev->device == 0x1304) ||
3203                     (rdev->pdev->device == 0x1305) ||
3204                     (rdev->pdev->device == 0x130C) ||
3205                     (rdev->pdev->device == 0x130F) ||
3206                     (rdev->pdev->device == 0x1310) ||
3207                     (rdev->pdev->device == 0x1311) ||
3208                     (rdev->pdev->device == 0x131C)) {
3209                         rdev->config.cik.max_cu_per_sh = 8;
3210                         rdev->config.cik.max_backends_per_se = 2;
3211                 } else if ((rdev->pdev->device == 0x1309) ||
3212                            (rdev->pdev->device == 0x130A) ||
3213                            (rdev->pdev->device == 0x130D) ||
3214                            (rdev->pdev->device == 0x1313) ||
3215                            (rdev->pdev->device == 0x131D)) {
3216                         rdev->config.cik.max_cu_per_sh = 6;
3217                         rdev->config.cik.max_backends_per_se = 2;
3218                 } else if ((rdev->pdev->device == 0x1306) ||
3219                            (rdev->pdev->device == 0x1307) ||
3220                            (rdev->pdev->device == 0x130B) ||
3221                            (rdev->pdev->device == 0x130E) ||
3222                            (rdev->pdev->device == 0x1315) ||
3223                            (rdev->pdev->device == 0x131B)) {
3224                         rdev->config.cik.max_cu_per_sh = 4;
3225                         rdev->config.cik.max_backends_per_se = 1;
3226                 } else {
3227                         rdev->config.cik.max_cu_per_sh = 3;
3228                         rdev->config.cik.max_backends_per_se = 1;
3229                 }
3230                 rdev->config.cik.max_sh_per_se = 1;
3231                 rdev->config.cik.max_texture_channel_caches = 4;
3232                 rdev->config.cik.max_gprs = 256;
3233                 rdev->config.cik.max_gs_threads = 16;
3234                 rdev->config.cik.max_hw_contexts = 8;
3235
3236                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3237                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3238                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3239                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3240                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3241                 break;
3242         case CHIP_KABINI:
3243         default:
3244                 rdev->config.cik.max_shader_engines = 1;
3245                 rdev->config.cik.max_tile_pipes = 2;
3246                 rdev->config.cik.max_cu_per_sh = 2;
3247                 rdev->config.cik.max_sh_per_se = 1;
3248                 rdev->config.cik.max_backends_per_se = 1;
3249                 rdev->config.cik.max_texture_channel_caches = 2;
3250                 rdev->config.cik.max_gprs = 256;
3251                 rdev->config.cik.max_gs_threads = 16;
3252                 rdev->config.cik.max_hw_contexts = 8;
3253
3254                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3255                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3256                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3257                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3258                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3259                 break;
3260         }
3261
3262         /* Initialize HDP */
3263         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3264                 WREG32((0x2c14 + j), 0x00000000);
3265                 WREG32((0x2c18 + j), 0x00000000);
3266                 WREG32((0x2c1c + j), 0x00000000);
3267                 WREG32((0x2c20 + j), 0x00000000);
3268                 WREG32((0x2c24 + j), 0x00000000);
3269         }
3270
3271         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3272
3273         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3274
3275         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3276         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3277
3278         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3279         rdev->config.cik.mem_max_burst_length_bytes = 256;
3280         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3281         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3282         if (rdev->config.cik.mem_row_size_in_kb > 4)
3283                 rdev->config.cik.mem_row_size_in_kb = 4;
3284         /* XXX use MC settings? */
3285         rdev->config.cik.shader_engine_tile_size = 32;
3286         rdev->config.cik.num_gpus = 1;
3287         rdev->config.cik.multi_gpu_tile_size = 64;
3288
3289         /* fix up row size */
3290         gb_addr_config &= ~ROW_SIZE_MASK;
3291         switch (rdev->config.cik.mem_row_size_in_kb) {
3292         case 1:
3293         default:
3294                 gb_addr_config |= ROW_SIZE(0);
3295                 break;
3296         case 2:
3297                 gb_addr_config |= ROW_SIZE(1);
3298                 break;
3299         case 4:
3300                 gb_addr_config |= ROW_SIZE(2);
3301                 break;
3302         }
3303
3304         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3305          * not have bank info, so create a custom tiling dword.
3306          * bits 3:0   num_pipes
3307          * bits 7:4   num_banks
3308          * bits 11:8  group_size
3309          * bits 15:12 row_size
3310          */
3311         rdev->config.cik.tile_config = 0;
3312         switch (rdev->config.cik.num_tile_pipes) {
3313         case 1:
3314                 rdev->config.cik.tile_config |= (0 << 0);
3315                 break;
3316         case 2:
3317                 rdev->config.cik.tile_config |= (1 << 0);
3318                 break;
3319         case 4:
3320                 rdev->config.cik.tile_config |= (2 << 0);
3321                 break;
3322         case 8:
3323         default:
3324                 /* XXX what about 12? */
3325                 rdev->config.cik.tile_config |= (3 << 0);
3326                 break;
3327         }
3328         rdev->config.cik.tile_config |=
3329                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3330         rdev->config.cik.tile_config |=
3331                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3332         rdev->config.cik.tile_config |=
3333                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3334
3335         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3336         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3337         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3338         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3339         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3340         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3341         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3342         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3343
3344         cik_tiling_mode_table_init(rdev);
3345
3346         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3347                      rdev->config.cik.max_sh_per_se,
3348                      rdev->config.cik.max_backends_per_se);
3349
3350         /* set HW defaults for 3D engine */
3351         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3352
3353         WREG32(SX_DEBUG_1, 0x20);
3354
3355         WREG32(TA_CNTL_AUX, 0x00010000);
3356
3357         tmp = RREG32(SPI_CONFIG_CNTL);
3358         tmp |= 0x03000000;
3359         WREG32(SPI_CONFIG_CNTL, tmp);
3360
3361         WREG32(SQ_CONFIG, 1);
3362
3363         WREG32(DB_DEBUG, 0);
3364
3365         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3366         tmp |= 0x00000400;
3367         WREG32(DB_DEBUG2, tmp);
3368
3369         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3370         tmp |= 0x00020200;
3371         WREG32(DB_DEBUG3, tmp);
3372
3373         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3374         tmp |= 0x00018208;
3375         WREG32(CB_HW_CONTROL, tmp);
3376
3377         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3378
3379         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3380                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3381                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3382                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3383
3384         WREG32(VGT_NUM_INSTANCES, 1);
3385
3386         WREG32(CP_PERFMON_CNTL, 0);
3387
3388         WREG32(SQ_CONFIG, 0);
3389
3390         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3391                                           FORCE_EOV_MAX_REZ_CNT(255)));
3392
3393         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3394                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3395
3396         WREG32(VGT_GS_VERTEX_REUSE, 16);
3397         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3398
3399         tmp = RREG32(HDP_MISC_CNTL);
3400         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3401         WREG32(HDP_MISC_CNTL, tmp);
3402
3403         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3404         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3405
3406         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3407         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3408
3409         udelay(50);
3410 }
3411
3412 /*
3413  * GPU scratch registers helpers function.
3414  */
3415 /**
3416  * cik_scratch_init - setup driver info for CP scratch regs
3417  *
3418  * @rdev: radeon_device pointer
3419  *
3420  * Set up the number and offset of the CP scratch registers.
3421  * NOTE: use of CP scratch registers is a legacy inferface and
3422  * is not used by default on newer asics (r6xx+).  On newer asics,
3423  * memory buffers are used for fences rather than scratch regs.
3424  */
3425 static void cik_scratch_init(struct radeon_device *rdev)
3426 {
3427         int i;
3428
3429         rdev->scratch.num_reg = 7;
3430         rdev->scratch.reg_base = SCRATCH_REG0;
3431         for (i = 0; i < rdev->scratch.num_reg; i++) {
3432                 rdev->scratch.free[i] = true;
3433                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3434         }
3435 }
3436
3437 /**
3438  * cik_ring_test - basic gfx ring test
3439  *
3440  * @rdev: radeon_device pointer
3441  * @ring: radeon_ring structure holding ring information
3442  *
3443  * Allocate a scratch register and write to it using the gfx ring (CIK).
3444  * Provides a basic gfx ring test to verify that the ring is working.
3445  * Used by cik_cp_gfx_resume();
3446  * Returns 0 on success, error on failure.
3447  */
3448 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3449 {
3450         uint32_t scratch;
3451         uint32_t tmp = 0;
3452         unsigned i;
3453         int r;
3454
3455         r = radeon_scratch_get(rdev, &scratch);
3456         if (r) {
3457                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3458                 return r;
3459         }
3460         WREG32(scratch, 0xCAFEDEAD);
3461         r = radeon_ring_lock(rdev, ring, 3);
3462         if (r) {
3463                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3464                 radeon_scratch_free(rdev, scratch);
3465                 return r;
3466         }
3467         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3468         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3469         radeon_ring_write(ring, 0xDEADBEEF);
3470         radeon_ring_unlock_commit(rdev, ring);
3471
3472         for (i = 0; i < rdev->usec_timeout; i++) {
3473                 tmp = RREG32(scratch);
3474                 if (tmp == 0xDEADBEEF)
3475                         break;
3476                 DRM_UDELAY(1);
3477         }
3478         if (i < rdev->usec_timeout) {
3479                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3480         } else {
3481                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3482                           ring->idx, scratch, tmp);
3483                 r = -EINVAL;
3484         }
3485         radeon_scratch_free(rdev, scratch);
3486         return r;
3487 }
3488
3489 /**
3490  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3491  *
3492  * @rdev: radeon_device pointer
3493  * @fence: radeon fence object
3494  *
3495  * Emits a fence sequnce number on the gfx ring and flushes
3496  * GPU caches.
3497  */
3498 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3499                              struct radeon_fence *fence)
3500 {
3501         struct radeon_ring *ring = &rdev->ring[fence->ring];
3502         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3503
3504         /* EVENT_WRITE_EOP - flush caches, send int */
3505         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3506         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3507                                  EOP_TC_ACTION_EN |
3508                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3509                                  EVENT_INDEX(5)));
3510         radeon_ring_write(ring, addr & 0xfffffffc);
3511         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3512         radeon_ring_write(ring, fence->seq);
3513         radeon_ring_write(ring, 0);
3514         /* HDP flush */
3515         /* We should be using the new WAIT_REG_MEM special op packet here
3516          * but it causes the CP to hang
3517          */
3518         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3519         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3520                                  WRITE_DATA_DST_SEL(0)));
3521         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3522         radeon_ring_write(ring, 0);
3523         radeon_ring_write(ring, 0);
3524 }
3525
3526 /**
3527  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3528  *
3529  * @rdev: radeon_device pointer
3530  * @fence: radeon fence object
3531  *
3532  * Emits a fence sequnce number on the compute ring and flushes
3533  * GPU caches.
3534  */
3535 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3536                                  struct radeon_fence *fence)
3537 {
3538         struct radeon_ring *ring = &rdev->ring[fence->ring];
3539         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3540
3541         /* RELEASE_MEM - flush caches, send int */
3542         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3543         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3544                                  EOP_TC_ACTION_EN |
3545                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3546                                  EVENT_INDEX(5)));
3547         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3548         radeon_ring_write(ring, addr & 0xfffffffc);
3549         radeon_ring_write(ring, upper_32_bits(addr));
3550         radeon_ring_write(ring, fence->seq);
3551         radeon_ring_write(ring, 0);
3552         /* HDP flush */
3553         /* We should be using the new WAIT_REG_MEM special op packet here
3554          * but it causes the CP to hang
3555          */
3556         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3557         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3558                                  WRITE_DATA_DST_SEL(0)));
3559         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3560         radeon_ring_write(ring, 0);
3561         radeon_ring_write(ring, 0);
3562 }
3563
3564 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3565                              struct radeon_ring *ring,
3566                              struct radeon_semaphore *semaphore,
3567                              bool emit_wait)
3568 {
3569         uint64_t addr = semaphore->gpu_addr;
3570         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3571
3572         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3573         radeon_ring_write(ring, addr & 0xffffffff);
3574         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3575
3576         return true;
3577 }
3578
3579 /**
3580  * cik_copy_cpdma - copy pages using the CP DMA engine
3581  *
3582  * @rdev: radeon_device pointer
3583  * @src_offset: src GPU address
3584  * @dst_offset: dst GPU address
3585  * @num_gpu_pages: number of GPU pages to xfer
3586  * @fence: radeon fence object
3587  *
3588  * Copy GPU paging using the CP DMA engine (CIK+).
3589  * Used by the radeon ttm implementation to move pages if
3590  * registered as the asic copy callback.
3591  */
3592 int cik_copy_cpdma(struct radeon_device *rdev,
3593                    uint64_t src_offset, uint64_t dst_offset,
3594                    unsigned num_gpu_pages,
3595                    struct radeon_fence **fence)
3596 {
3597         struct radeon_semaphore *sem = NULL;
3598         int ring_index = rdev->asic->copy.blit_ring_index;
3599         struct radeon_ring *ring = &rdev->ring[ring_index];
3600         u32 size_in_bytes, cur_size_in_bytes, control;
3601         int i, num_loops;
3602         int r = 0;
3603
3604         r = radeon_semaphore_create(rdev, &sem);
3605         if (r) {
3606                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3607                 return r;
3608         }
3609
3610         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3611         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3612         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3613         if (r) {
3614                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3615                 radeon_semaphore_free(rdev, &sem, NULL);
3616                 return r;
3617         }
3618
3619         radeon_semaphore_sync_to(sem, *fence);
3620         radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3621
3622         for (i = 0; i < num_loops; i++) {
3623                 cur_size_in_bytes = size_in_bytes;
3624                 if (cur_size_in_bytes > 0x1fffff)
3625                         cur_size_in_bytes = 0x1fffff;
3626                 size_in_bytes -= cur_size_in_bytes;
3627                 control = 0;
3628                 if (size_in_bytes == 0)
3629                         control |= PACKET3_DMA_DATA_CP_SYNC;
3630                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3631                 radeon_ring_write(ring, control);
3632                 radeon_ring_write(ring, lower_32_bits(src_offset));
3633                 radeon_ring_write(ring, upper_32_bits(src_offset));
3634                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3635                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3636                 radeon_ring_write(ring, cur_size_in_bytes);
3637                 src_offset += cur_size_in_bytes;
3638                 dst_offset += cur_size_in_bytes;
3639         }
3640
3641         r = radeon_fence_emit(rdev, fence, ring->idx);
3642         if (r) {
3643                 radeon_ring_unlock_undo(rdev, ring);
3644                 return r;
3645         }
3646
3647         radeon_ring_unlock_commit(rdev, ring);
3648         radeon_semaphore_free(rdev, &sem, *fence);
3649
3650         return r;
3651 }
3652
3653 /*
3654  * IB stuff
3655  */
3656 /**
3657  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3658  *
3659  * @rdev: radeon_device pointer
3660  * @ib: radeon indirect buffer object
3661  *
3662  * Emits an DE (drawing engine) or CE (constant engine) IB
3663  * on the gfx ring.  IBs are usually generated by userspace
3664  * acceleration drivers and submitted to the kernel for
3665  * sheduling on the ring.  This function schedules the IB
3666  * on the gfx ring for execution by the GPU.
3667  */
3668 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3669 {
3670         struct radeon_ring *ring = &rdev->ring[ib->ring];
3671         u32 header, control = INDIRECT_BUFFER_VALID;
3672
3673         if (ib->is_const_ib) {
3674                 /* set switch buffer packet before const IB */
3675                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3676                 radeon_ring_write(ring, 0);
3677
3678                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3679         } else {
3680                 u32 next_rptr;
3681                 if (ring->rptr_save_reg) {
3682                         next_rptr = ring->wptr + 3 + 4;
3683                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3684                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3685                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3686                         radeon_ring_write(ring, next_rptr);
3687                 } else if (rdev->wb.enabled) {
3688                         next_rptr = ring->wptr + 5 + 4;
3689                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3690                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3691                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3692                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3693                         radeon_ring_write(ring, next_rptr);
3694                 }
3695
3696                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3697         }
3698
3699         control |= ib->length_dw |
3700                 (ib->vm ? (ib->vm->id << 24) : 0);
3701
3702         radeon_ring_write(ring, header);
3703         radeon_ring_write(ring,
3704 #ifdef __BIG_ENDIAN
3705                           (2 << 0) |
3706 #endif
3707                           (ib->gpu_addr & 0xFFFFFFFC));
3708         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3709         radeon_ring_write(ring, control);
3710 }
3711
3712 /**
3713  * cik_ib_test - basic gfx ring IB test
3714  *
3715  * @rdev: radeon_device pointer
3716  * @ring: radeon_ring structure holding ring information
3717  *
3718  * Allocate an IB and execute it on the gfx ring (CIK).
3719  * Provides a basic gfx ring test to verify that IBs are working.
3720  * Returns 0 on success, error on failure.
3721  */
3722 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3723 {
3724         struct radeon_ib ib;
3725         uint32_t scratch;
3726         uint32_t tmp = 0;
3727         unsigned i;
3728         int r;
3729
3730         r = radeon_scratch_get(rdev, &scratch);
3731         if (r) {
3732                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3733                 return r;
3734         }
3735         WREG32(scratch, 0xCAFEDEAD);
3736         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3737         if (r) {
3738                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3739                 radeon_scratch_free(rdev, scratch);
3740                 return r;
3741         }
3742         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3743         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3744         ib.ptr[2] = 0xDEADBEEF;
3745         ib.length_dw = 3;
3746         r = radeon_ib_schedule(rdev, &ib, NULL);
3747         if (r) {
3748                 radeon_scratch_free(rdev, scratch);
3749                 radeon_ib_free(rdev, &ib);
3750                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3751                 return r;
3752         }
3753         r = radeon_fence_wait(ib.fence, false);
3754         if (r) {
3755                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3756                 radeon_scratch_free(rdev, scratch);
3757                 radeon_ib_free(rdev, &ib);
3758                 return r;
3759         }
3760         for (i = 0; i < rdev->usec_timeout; i++) {
3761                 tmp = RREG32(scratch);
3762                 if (tmp == 0xDEADBEEF)
3763                         break;
3764                 DRM_UDELAY(1);
3765         }
3766         if (i < rdev->usec_timeout) {
3767                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3768         } else {
3769                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3770                           scratch, tmp);
3771                 r = -EINVAL;
3772         }
3773         radeon_scratch_free(rdev, scratch);
3774         radeon_ib_free(rdev, &ib);
3775         return r;
3776 }
3777
3778 /*
3779  * CP.
3780  * On CIK, gfx and compute now have independant command processors.
3781  *
3782  * GFX
3783  * Gfx consists of a single ring and can process both gfx jobs and
3784  * compute jobs.  The gfx CP consists of three microengines (ME):
3785  * PFP - Pre-Fetch Parser
3786  * ME - Micro Engine
3787  * CE - Constant Engine
3788  * The PFP and ME make up what is considered the Drawing Engine (DE).
3789  * The CE is an asynchronous engine used for updating buffer desciptors
3790  * used by the DE so that they can be loaded into cache in parallel
3791  * while the DE is processing state update packets.
3792  *
3793  * Compute
3794  * The compute CP consists of two microengines (ME):
3795  * MEC1 - Compute MicroEngine 1
3796  * MEC2 - Compute MicroEngine 2
3797  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3798  * The queues are exposed to userspace and are programmed directly
3799  * by the compute runtime.
3800  */
3801 /**
3802  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3803  *
3804  * @rdev: radeon_device pointer
3805  * @enable: enable or disable the MEs
3806  *
3807  * Halts or unhalts the gfx MEs.
3808  */
3809 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3810 {
3811         if (enable)
3812                 WREG32(CP_ME_CNTL, 0);
3813         else {
3814                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3815                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3816         }
3817         udelay(50);
3818 }
3819
3820 /**
3821  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3822  *
3823  * @rdev: radeon_device pointer
3824  *
3825  * Loads the gfx PFP, ME, and CE ucode.
3826  * Returns 0 for success, -EINVAL if the ucode is not available.
3827  */
3828 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3829 {
3830         const __be32 *fw_data;
3831         int i;
3832
3833         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3834                 return -EINVAL;
3835
3836         cik_cp_gfx_enable(rdev, false);
3837
3838         /* PFP */
3839         fw_data = (const __be32 *)rdev->pfp_fw->data;
3840         WREG32(CP_PFP_UCODE_ADDR, 0);
3841         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3842                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3843         WREG32(CP_PFP_UCODE_ADDR, 0);
3844
3845         /* CE */
3846         fw_data = (const __be32 *)rdev->ce_fw->data;
3847         WREG32(CP_CE_UCODE_ADDR, 0);
3848         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3849                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3850         WREG32(CP_CE_UCODE_ADDR, 0);
3851
3852         /* ME */
3853         fw_data = (const __be32 *)rdev->me_fw->data;
3854         WREG32(CP_ME_RAM_WADDR, 0);
3855         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3856                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3857         WREG32(CP_ME_RAM_WADDR, 0);
3858
3859         WREG32(CP_PFP_UCODE_ADDR, 0);
3860         WREG32(CP_CE_UCODE_ADDR, 0);
3861         WREG32(CP_ME_RAM_WADDR, 0);
3862         WREG32(CP_ME_RAM_RADDR, 0);
3863         return 0;
3864 }
3865
3866 /**
3867  * cik_cp_gfx_start - start the gfx ring
3868  *
3869  * @rdev: radeon_device pointer
3870  *
3871  * Enables the ring and loads the clear state context and other
3872  * packets required to init the ring.
3873  * Returns 0 for success, error for failure.
3874  */
3875 static int cik_cp_gfx_start(struct radeon_device *rdev)
3876 {
3877         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3878         int r, i;
3879
3880         /* init the CP */
3881         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3882         WREG32(CP_ENDIAN_SWAP, 0);
3883         WREG32(CP_DEVICE_ID, 1);
3884
3885         cik_cp_gfx_enable(rdev, true);
3886
3887         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3888         if (r) {
3889                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3890                 return r;
3891         }
3892
3893         /* init the CE partitions.  CE only used for gfx on CIK */
3894         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3895         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3896         radeon_ring_write(ring, 0xc000);
3897         radeon_ring_write(ring, 0xc000);
3898
3899         /* setup clear context state */
3900         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3901         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3902
3903         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3904         radeon_ring_write(ring, 0x80000000);
3905         radeon_ring_write(ring, 0x80000000);
3906
3907         for (i = 0; i < cik_default_size; i++)
3908                 radeon_ring_write(ring, cik_default_state[i]);
3909
3910         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3911         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3912
3913         /* set clear context state */
3914         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3915         radeon_ring_write(ring, 0);
3916
3917         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3918         radeon_ring_write(ring, 0x00000316);
3919         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3920         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3921
3922         radeon_ring_unlock_commit(rdev, ring);
3923
3924         return 0;
3925 }
3926
3927 /**
3928  * cik_cp_gfx_fini - stop the gfx ring
3929  *
3930  * @rdev: radeon_device pointer
3931  *
3932  * Stop the gfx ring and tear down the driver ring
3933  * info.
3934  */
3935 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3936 {
3937         cik_cp_gfx_enable(rdev, false);
3938         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3939 }
3940
3941 /**
3942  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3943  *
3944  * @rdev: radeon_device pointer
3945  *
3946  * Program the location and size of the gfx ring buffer
3947  * and test it to make sure it's working.
3948  * Returns 0 for success, error for failure.
3949  */
3950 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3951 {
3952         struct radeon_ring *ring;
3953         u32 tmp;
3954         u32 rb_bufsz;
3955         u64 rb_addr;
3956         int r;
3957
3958         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3959         if (rdev->family != CHIP_HAWAII)
3960                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3961
3962         /* Set the write pointer delay */
3963         WREG32(CP_RB_WPTR_DELAY, 0);
3964
3965         /* set the RB to use vmid 0 */
3966         WREG32(CP_RB_VMID, 0);
3967
3968         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3969
3970         /* ring 0 - compute and gfx */
3971         /* Set ring buffer size */
3972         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3973         rb_bufsz = order_base_2(ring->ring_size / 8);
3974         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3975 #ifdef __BIG_ENDIAN
3976         tmp |= BUF_SWAP_32BIT;
3977 #endif
3978         WREG32(CP_RB0_CNTL, tmp);
3979
3980         /* Initialize the ring buffer's read and write pointers */
3981         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3982         ring->wptr = 0;
3983         WREG32(CP_RB0_WPTR, ring->wptr);
3984
3985         /* set the wb address wether it's enabled or not */
3986         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3987         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3988
3989         /* scratch register shadowing is no longer supported */
3990         WREG32(SCRATCH_UMSK, 0);
3991
3992         if (!rdev->wb.enabled)
3993                 tmp |= RB_NO_UPDATE;
3994
3995         mdelay(1);
3996         WREG32(CP_RB0_CNTL, tmp);
3997
3998         rb_addr = ring->gpu_addr >> 8;
3999         WREG32(CP_RB0_BASE, rb_addr);
4000         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4001
4002         ring->rptr = RREG32(CP_RB0_RPTR);
4003
4004         /* start the ring */
4005         cik_cp_gfx_start(rdev);
4006         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4007         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4008         if (r) {
4009                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4010                 return r;
4011         }
4012         return 0;
4013 }
4014
4015 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4016                      struct radeon_ring *ring)
4017 {
4018         u32 rptr;
4019
4020         if (rdev->wb.enabled)
4021                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4022         else
4023                 rptr = RREG32(CP_RB0_RPTR);
4024
4025         return rptr;
4026 }
4027
4028 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4029                      struct radeon_ring *ring)
4030 {
4031         u32 wptr;
4032
4033         wptr = RREG32(CP_RB0_WPTR);
4034
4035         return wptr;
4036 }
4037
4038 void cik_gfx_set_wptr(struct radeon_device *rdev,
4039                       struct radeon_ring *ring)
4040 {
4041         WREG32(CP_RB0_WPTR, ring->wptr);
4042         (void)RREG32(CP_RB0_WPTR);
4043 }
4044
4045 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4046                          struct radeon_ring *ring)
4047 {
4048         u32 rptr;
4049
4050         if (rdev->wb.enabled) {
4051                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4052         } else {
4053                 mutex_lock(&rdev->srbm_mutex);
4054                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4055                 rptr = RREG32(CP_HQD_PQ_RPTR);
4056                 cik_srbm_select(rdev, 0, 0, 0, 0);
4057                 mutex_unlock(&rdev->srbm_mutex);
4058         }
4059
4060         return rptr;
4061 }
4062
4063 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4064                          struct radeon_ring *ring)
4065 {
4066         u32 wptr;
4067
4068         if (rdev->wb.enabled) {
4069                 /* XXX check if swapping is necessary on BE */
4070                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4071         } else {
4072                 mutex_lock(&rdev->srbm_mutex);
4073                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4074                 wptr = RREG32(CP_HQD_PQ_WPTR);
4075                 cik_srbm_select(rdev, 0, 0, 0, 0);
4076                 mutex_unlock(&rdev->srbm_mutex);
4077         }
4078
4079         return wptr;
4080 }
4081
4082 void cik_compute_set_wptr(struct radeon_device *rdev,
4083                           struct radeon_ring *ring)
4084 {
4085         /* XXX check if swapping is necessary on BE */
4086         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4087         WDOORBELL32(ring->doorbell_index, ring->wptr);
4088 }
4089
4090 /**
4091  * cik_cp_compute_enable - enable/disable the compute CP MEs
4092  *
4093  * @rdev: radeon_device pointer
4094  * @enable: enable or disable the MEs
4095  *
4096  * Halts or unhalts the compute MEs.
4097  */
4098 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4099 {
4100         if (enable)
4101                 WREG32(CP_MEC_CNTL, 0);
4102         else
4103                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4104         udelay(50);
4105 }
4106
4107 /**
4108  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4109  *
4110  * @rdev: radeon_device pointer
4111  *
4112  * Loads the compute MEC1&2 ucode.
4113  * Returns 0 for success, -EINVAL if the ucode is not available.
4114  */
4115 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4116 {
4117         const __be32 *fw_data;
4118         int i;
4119
4120         if (!rdev->mec_fw)
4121                 return -EINVAL;
4122
4123         cik_cp_compute_enable(rdev, false);
4124
4125         /* MEC1 */
4126         fw_data = (const __be32 *)rdev->mec_fw->data;
4127         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4128         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4129                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4130         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4131
4132         if (rdev->family == CHIP_KAVERI) {
4133                 /* MEC2 */
4134                 fw_data = (const __be32 *)rdev->mec_fw->data;
4135                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4136                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4137                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4138                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4139         }
4140
4141         return 0;
4142 }
4143
4144 /**
4145  * cik_cp_compute_start - start the compute queues
4146  *
4147  * @rdev: radeon_device pointer
4148  *
4149  * Enable the compute queues.
4150  * Returns 0 for success, error for failure.
4151  */
4152 static int cik_cp_compute_start(struct radeon_device *rdev)
4153 {
4154         cik_cp_compute_enable(rdev, true);
4155
4156         return 0;
4157 }
4158
4159 /**
4160  * cik_cp_compute_fini - stop the compute queues
4161  *
4162  * @rdev: radeon_device pointer
4163  *
4164  * Stop the compute queues and tear down the driver queue
4165  * info.
4166  */
4167 static void cik_cp_compute_fini(struct radeon_device *rdev)
4168 {
4169         int i, idx, r;
4170
4171         cik_cp_compute_enable(rdev, false);
4172
4173         for (i = 0; i < 2; i++) {
4174                 if (i == 0)
4175                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4176                 else
4177                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4178
4179                 if (rdev->ring[idx].mqd_obj) {
4180                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4181                         if (unlikely(r != 0))
4182                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4183
4184                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4185                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4186
4187                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4188                         rdev->ring[idx].mqd_obj = NULL;
4189                 }
4190         }
4191 }
4192
4193 static void cik_mec_fini(struct radeon_device *rdev)
4194 {
4195         int r;
4196
4197         if (rdev->mec.hpd_eop_obj) {
4198                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4199                 if (unlikely(r != 0))
4200                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4201                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4202                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4203
4204                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4205                 rdev->mec.hpd_eop_obj = NULL;
4206         }
4207 }
4208
4209 #define MEC_HPD_SIZE 2048
4210
4211 static int cik_mec_init(struct radeon_device *rdev)
4212 {
4213         int r;
4214         u32 *hpd;
4215
4216         /*
4217          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4218          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4219          */
4220         if (rdev->family == CHIP_KAVERI)
4221                 rdev->mec.num_mec = 2;
4222         else
4223                 rdev->mec.num_mec = 1;
4224         rdev->mec.num_pipe = 4;
4225         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4226
4227         if (rdev->mec.hpd_eop_obj == NULL) {
4228                 r = radeon_bo_create(rdev,
4229                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4230                                      PAGE_SIZE, true,
4231                                      RADEON_GEM_DOMAIN_GTT, NULL,
4232                                      &rdev->mec.hpd_eop_obj);
4233                 if (r) {
4234                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4235                         return r;
4236                 }
4237         }
4238
4239         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4240         if (unlikely(r != 0)) {
4241                 cik_mec_fini(rdev);
4242                 return r;
4243         }
4244         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4245                           &rdev->mec.hpd_eop_gpu_addr);
4246         if (r) {
4247                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4248                 cik_mec_fini(rdev);
4249                 return r;
4250         }
4251         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4252         if (r) {
4253                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4254                 cik_mec_fini(rdev);
4255                 return r;
4256         }
4257
4258         /* clear memory.  Not sure if this is required or not */
4259         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4260
4261         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4262         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4263
4264         return 0;
4265 }
4266
4267 struct hqd_registers
4268 {
4269         u32 cp_mqd_base_addr;
4270         u32 cp_mqd_base_addr_hi;
4271         u32 cp_hqd_active;
4272         u32 cp_hqd_vmid;
4273         u32 cp_hqd_persistent_state;
4274         u32 cp_hqd_pipe_priority;
4275         u32 cp_hqd_queue_priority;
4276         u32 cp_hqd_quantum;
4277         u32 cp_hqd_pq_base;
4278         u32 cp_hqd_pq_base_hi;
4279         u32 cp_hqd_pq_rptr;
4280         u32 cp_hqd_pq_rptr_report_addr;
4281         u32 cp_hqd_pq_rptr_report_addr_hi;
4282         u32 cp_hqd_pq_wptr_poll_addr;
4283         u32 cp_hqd_pq_wptr_poll_addr_hi;
4284         u32 cp_hqd_pq_doorbell_control;
4285         u32 cp_hqd_pq_wptr;
4286         u32 cp_hqd_pq_control;
4287         u32 cp_hqd_ib_base_addr;
4288         u32 cp_hqd_ib_base_addr_hi;
4289         u32 cp_hqd_ib_rptr;
4290         u32 cp_hqd_ib_control;
4291         u32 cp_hqd_iq_timer;
4292         u32 cp_hqd_iq_rptr;
4293         u32 cp_hqd_dequeue_request;
4294         u32 cp_hqd_dma_offload;
4295         u32 cp_hqd_sema_cmd;
4296         u32 cp_hqd_msg_type;
4297         u32 cp_hqd_atomic0_preop_lo;
4298         u32 cp_hqd_atomic0_preop_hi;
4299         u32 cp_hqd_atomic1_preop_lo;
4300         u32 cp_hqd_atomic1_preop_hi;
4301         u32 cp_hqd_hq_scheduler0;
4302         u32 cp_hqd_hq_scheduler1;
4303         u32 cp_mqd_control;
4304 };
4305
4306 struct bonaire_mqd
4307 {
4308         u32 header;
4309         u32 dispatch_initiator;
4310         u32 dimensions[3];
4311         u32 start_idx[3];
4312         u32 num_threads[3];
4313         u32 pipeline_stat_enable;
4314         u32 perf_counter_enable;
4315         u32 pgm[2];
4316         u32 tba[2];
4317         u32 tma[2];
4318         u32 pgm_rsrc[2];
4319         u32 vmid;
4320         u32 resource_limits;
4321         u32 static_thread_mgmt01[2];
4322         u32 tmp_ring_size;
4323         u32 static_thread_mgmt23[2];
4324         u32 restart[3];
4325         u32 thread_trace_enable;
4326         u32 reserved1;
4327         u32 user_data[16];
4328         u32 vgtcs_invoke_count[2];
4329         struct hqd_registers queue_state;
4330         u32 dequeue_cntr;
4331         u32 interrupt_queue[64];
4332 };
4333
4334 /**
4335  * cik_cp_compute_resume - setup the compute queue registers
4336  *
4337  * @rdev: radeon_device pointer
4338  *
4339  * Program the compute queues and test them to make sure they
4340  * are working.
4341  * Returns 0 for success, error for failure.
4342  */
4343 static int cik_cp_compute_resume(struct radeon_device *rdev)
4344 {
4345         int r, i, idx;
4346         u32 tmp;
4347         bool use_doorbell = true;
4348         u64 hqd_gpu_addr;
4349         u64 mqd_gpu_addr;
4350         u64 eop_gpu_addr;
4351         u64 wb_gpu_addr;
4352         u32 *buf;
4353         struct bonaire_mqd *mqd;
4354
4355         r = cik_cp_compute_start(rdev);
4356         if (r)
4357                 return r;
4358
4359         /* fix up chicken bits */
4360         tmp = RREG32(CP_CPF_DEBUG);
4361         tmp |= (1 << 23);
4362         WREG32(CP_CPF_DEBUG, tmp);
4363
4364         /* init the pipes */
4365         mutex_lock(&rdev->srbm_mutex);
4366         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4367                 int me = (i < 4) ? 1 : 2;
4368                 int pipe = (i < 4) ? i : (i - 4);
4369
4370                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4371
4372                 cik_srbm_select(rdev, me, pipe, 0, 0);
4373
4374                 /* write the EOP addr */
4375                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4376                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4377
4378                 /* set the VMID assigned */
4379                 WREG32(CP_HPD_EOP_VMID, 0);
4380
4381                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4382                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4383                 tmp &= ~EOP_SIZE_MASK;
4384                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4385                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4386         }
4387         cik_srbm_select(rdev, 0, 0, 0, 0);
4388         mutex_unlock(&rdev->srbm_mutex);
4389
4390         /* init the queues.  Just two for now. */
4391         for (i = 0; i < 2; i++) {
4392                 if (i == 0)
4393                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4394                 else
4395                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4396
4397                 if (rdev->ring[idx].mqd_obj == NULL) {
4398                         r = radeon_bo_create(rdev,
4399                                              sizeof(struct bonaire_mqd),
4400                                              PAGE_SIZE, true,
4401                                              RADEON_GEM_DOMAIN_GTT, NULL,
4402                                              &rdev->ring[idx].mqd_obj);
4403                         if (r) {
4404                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4405                                 return r;
4406                         }
4407                 }
4408
4409                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4410                 if (unlikely(r != 0)) {
4411                         cik_cp_compute_fini(rdev);
4412                         return r;
4413                 }
4414                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4415                                   &mqd_gpu_addr);
4416                 if (r) {
4417                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4418                         cik_cp_compute_fini(rdev);
4419                         return r;
4420                 }
4421                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4422                 if (r) {
4423                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4424                         cik_cp_compute_fini(rdev);
4425                         return r;
4426                 }
4427
4428                 /* init the mqd struct */
4429                 memset(buf, 0, sizeof(struct bonaire_mqd));
4430
4431                 mqd = (struct bonaire_mqd *)buf;
4432                 mqd->header = 0xC0310800;
4433                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4434                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4435                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4436                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4437
4438                 mutex_lock(&rdev->srbm_mutex);
4439                 cik_srbm_select(rdev, rdev->ring[idx].me,
4440                                 rdev->ring[idx].pipe,
4441                                 rdev->ring[idx].queue, 0);
4442
4443                 /* disable wptr polling */
4444                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4445                 tmp &= ~WPTR_POLL_EN;
4446                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4447
4448                 /* enable doorbell? */
4449                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4450                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4451                 if (use_doorbell)
4452                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4453                 else
4454                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4455                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4456                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4457
4458                 /* disable the queue if it's active */
4459                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4460                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4461                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4462                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4463                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4464                         for (i = 0; i < rdev->usec_timeout; i++) {
4465                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4466                                         break;
4467                                 udelay(1);
4468                         }
4469                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4470                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4471                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4472                 }
4473
4474                 /* set the pointer to the MQD */
4475                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4476                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4477                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4478                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4479                 /* set MQD vmid to 0 */
4480                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4481                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4482                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4483
4484                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4485                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4486                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4487                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4488                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4489                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4490
4491                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4492                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4493                 mqd->queue_state.cp_hqd_pq_control &=
4494                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4495
4496                 mqd->queue_state.cp_hqd_pq_control |=
4497                         order_base_2(rdev->ring[idx].ring_size / 8);
4498                 mqd->queue_state.cp_hqd_pq_control |=
4499                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4500 #ifdef __BIG_ENDIAN
4501                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4502 #endif
4503                 mqd->queue_state.cp_hqd_pq_control &=
4504                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4505                 mqd->queue_state.cp_hqd_pq_control |=
4506                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4507                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4508
4509                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4510                 if (i == 0)
4511                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4512                 else
4513                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4514                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4515                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4516                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4517                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4518                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4519
4520                 /* set the wb address wether it's enabled or not */
4521                 if (i == 0)
4522                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4523                 else
4524                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4525                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4526                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4527                         upper_32_bits(wb_gpu_addr) & 0xffff;
4528                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4529                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4530                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4531                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4532
4533                 /* enable the doorbell if requested */
4534                 if (use_doorbell) {
4535                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4536                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4537                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4538                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4539                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4540                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4541                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4542                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4543
4544                 } else {
4545                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4546                 }
4547                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4548                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4549
4550                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4551                 rdev->ring[idx].wptr = 0;
4552                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4553                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4554                 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
4555                 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
4556
4557                 /* set the vmid for the queue */
4558                 mqd->queue_state.cp_hqd_vmid = 0;
4559                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4560
4561                 /* activate the queue */
4562                 mqd->queue_state.cp_hqd_active = 1;
4563                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4564
4565                 cik_srbm_select(rdev, 0, 0, 0, 0);
4566                 mutex_unlock(&rdev->srbm_mutex);
4567
4568                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4569                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4570
4571                 rdev->ring[idx].ready = true;
4572                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4573                 if (r)
4574                         rdev->ring[idx].ready = false;
4575         }
4576
4577         return 0;
4578 }
4579
4580 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4581 {
4582         cik_cp_gfx_enable(rdev, enable);
4583         cik_cp_compute_enable(rdev, enable);
4584 }
4585
4586 static int cik_cp_load_microcode(struct radeon_device *rdev)
4587 {
4588         int r;
4589
4590         r = cik_cp_gfx_load_microcode(rdev);
4591         if (r)
4592                 return r;
4593         r = cik_cp_compute_load_microcode(rdev);
4594         if (r)
4595                 return r;
4596
4597         return 0;
4598 }
4599
4600 static void cik_cp_fini(struct radeon_device *rdev)
4601 {
4602         cik_cp_gfx_fini(rdev);
4603         cik_cp_compute_fini(rdev);
4604 }
4605
4606 static int cik_cp_resume(struct radeon_device *rdev)
4607 {
4608         int r;
4609
4610         cik_enable_gui_idle_interrupt(rdev, false);
4611
4612         r = cik_cp_load_microcode(rdev);
4613         if (r)
4614                 return r;
4615
4616         r = cik_cp_gfx_resume(rdev);
4617         if (r)
4618                 return r;
4619         r = cik_cp_compute_resume(rdev);
4620         if (r)
4621                 return r;
4622
4623         cik_enable_gui_idle_interrupt(rdev, true);
4624
4625         return 0;
4626 }
4627
4628 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4629 {
4630         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4631                 RREG32(GRBM_STATUS));
4632         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4633                 RREG32(GRBM_STATUS2));
4634         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4635                 RREG32(GRBM_STATUS_SE0));
4636         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4637                 RREG32(GRBM_STATUS_SE1));
4638         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4639                 RREG32(GRBM_STATUS_SE2));
4640         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4641                 RREG32(GRBM_STATUS_SE3));
4642         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4643                 RREG32(SRBM_STATUS));
4644         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4645                 RREG32(SRBM_STATUS2));
4646         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4647                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4648         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4649                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4650         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4651         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4652                  RREG32(CP_STALLED_STAT1));
4653         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4654                  RREG32(CP_STALLED_STAT2));
4655         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4656                  RREG32(CP_STALLED_STAT3));
4657         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4658                  RREG32(CP_CPF_BUSY_STAT));
4659         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4660                  RREG32(CP_CPF_STALLED_STAT1));
4661         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4662         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4663         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4664                  RREG32(CP_CPC_STALLED_STAT1));
4665         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4666 }
4667
4668 /**
4669  * cik_gpu_check_soft_reset - check which blocks are busy
4670  *
4671  * @rdev: radeon_device pointer
4672  *
4673  * Check which blocks are busy and return the relevant reset
4674  * mask to be used by cik_gpu_soft_reset().
4675  * Returns a mask of the blocks to be reset.
4676  */
4677 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4678 {
4679         u32 reset_mask = 0;
4680         u32 tmp;
4681
4682         /* GRBM_STATUS */
4683         tmp = RREG32(GRBM_STATUS);
4684         if (tmp & (PA_BUSY | SC_BUSY |
4685                    BCI_BUSY | SX_BUSY |
4686                    TA_BUSY | VGT_BUSY |
4687                    DB_BUSY | CB_BUSY |
4688                    GDS_BUSY | SPI_BUSY |
4689                    IA_BUSY | IA_BUSY_NO_DMA))
4690                 reset_mask |= RADEON_RESET_GFX;
4691
4692         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4693                 reset_mask |= RADEON_RESET_CP;
4694
4695         /* GRBM_STATUS2 */
4696         tmp = RREG32(GRBM_STATUS2);
4697         if (tmp & RLC_BUSY)
4698                 reset_mask |= RADEON_RESET_RLC;
4699
4700         /* SDMA0_STATUS_REG */
4701         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4702         if (!(tmp & SDMA_IDLE))
4703                 reset_mask |= RADEON_RESET_DMA;
4704
4705         /* SDMA1_STATUS_REG */
4706         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4707         if (!(tmp & SDMA_IDLE))
4708                 reset_mask |= RADEON_RESET_DMA1;
4709
4710         /* SRBM_STATUS2 */
4711         tmp = RREG32(SRBM_STATUS2);
4712         if (tmp & SDMA_BUSY)
4713                 reset_mask |= RADEON_RESET_DMA;
4714
4715         if (tmp & SDMA1_BUSY)
4716                 reset_mask |= RADEON_RESET_DMA1;
4717
4718         /* SRBM_STATUS */
4719         tmp = RREG32(SRBM_STATUS);
4720
4721         if (tmp & IH_BUSY)
4722                 reset_mask |= RADEON_RESET_IH;
4723
4724         if (tmp & SEM_BUSY)
4725                 reset_mask |= RADEON_RESET_SEM;
4726
4727         if (tmp & GRBM_RQ_PENDING)
4728                 reset_mask |= RADEON_RESET_GRBM;
4729
4730         if (tmp & VMC_BUSY)
4731                 reset_mask |= RADEON_RESET_VMC;
4732
4733         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4734                    MCC_BUSY | MCD_BUSY))
4735                 reset_mask |= RADEON_RESET_MC;
4736
4737         if (evergreen_is_display_hung(rdev))
4738                 reset_mask |= RADEON_RESET_DISPLAY;
4739
4740         /* Skip MC reset as it's mostly likely not hung, just busy */
4741         if (reset_mask & RADEON_RESET_MC) {
4742                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4743                 reset_mask &= ~RADEON_RESET_MC;
4744         }
4745
4746         return reset_mask;
4747 }
4748
4749 /**
4750  * cik_gpu_soft_reset - soft reset GPU
4751  *
4752  * @rdev: radeon_device pointer
4753  * @reset_mask: mask of which blocks to reset
4754  *
4755  * Soft reset the blocks specified in @reset_mask.
4756  */
4757 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4758 {
4759         struct evergreen_mc_save save;
4760         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4761         u32 tmp;
4762
4763         if (reset_mask == 0)
4764                 return;
4765
4766         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4767
4768         cik_print_gpu_status_regs(rdev);
4769         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4770                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4771         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4772                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4773
4774         /* disable CG/PG */
4775         cik_fini_pg(rdev);
4776         cik_fini_cg(rdev);
4777
4778         /* stop the rlc */
4779         cik_rlc_stop(rdev);
4780
4781         /* Disable GFX parsing/prefetching */
4782         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4783
4784         /* Disable MEC parsing/prefetching */
4785         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4786
4787         if (reset_mask & RADEON_RESET_DMA) {
4788                 /* sdma0 */
4789                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4790                 tmp |= SDMA_HALT;
4791                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4792         }
4793         if (reset_mask & RADEON_RESET_DMA1) {
4794                 /* sdma1 */
4795                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4796                 tmp |= SDMA_HALT;
4797                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4798         }
4799
4800         evergreen_mc_stop(rdev, &save);
4801         if (evergreen_mc_wait_for_idle(rdev)) {
4802                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4803         }
4804
4805         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4806                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4807
4808         if (reset_mask & RADEON_RESET_CP) {
4809                 grbm_soft_reset |= SOFT_RESET_CP;
4810
4811                 srbm_soft_reset |= SOFT_RESET_GRBM;
4812         }
4813
4814         if (reset_mask & RADEON_RESET_DMA)
4815                 srbm_soft_reset |= SOFT_RESET_SDMA;
4816
4817         if (reset_mask & RADEON_RESET_DMA1)
4818                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4819
4820         if (reset_mask & RADEON_RESET_DISPLAY)
4821                 srbm_soft_reset |= SOFT_RESET_DC;
4822
4823         if (reset_mask & RADEON_RESET_RLC)
4824                 grbm_soft_reset |= SOFT_RESET_RLC;
4825
4826         if (reset_mask & RADEON_RESET_SEM)
4827                 srbm_soft_reset |= SOFT_RESET_SEM;
4828
4829         if (reset_mask & RADEON_RESET_IH)
4830                 srbm_soft_reset |= SOFT_RESET_IH;
4831
4832         if (reset_mask & RADEON_RESET_GRBM)
4833                 srbm_soft_reset |= SOFT_RESET_GRBM;
4834
4835         if (reset_mask & RADEON_RESET_VMC)
4836                 srbm_soft_reset |= SOFT_RESET_VMC;
4837
4838         if (!(rdev->flags & RADEON_IS_IGP)) {
4839                 if (reset_mask & RADEON_RESET_MC)
4840                         srbm_soft_reset |= SOFT_RESET_MC;
4841         }
4842
4843         if (grbm_soft_reset) {
4844                 tmp = RREG32(GRBM_SOFT_RESET);
4845                 tmp |= grbm_soft_reset;
4846                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4847                 WREG32(GRBM_SOFT_RESET, tmp);
4848                 tmp = RREG32(GRBM_SOFT_RESET);
4849
4850                 udelay(50);
4851
4852                 tmp &= ~grbm_soft_reset;
4853                 WREG32(GRBM_SOFT_RESET, tmp);
4854                 tmp = RREG32(GRBM_SOFT_RESET);
4855         }
4856
4857         if (srbm_soft_reset) {
4858                 tmp = RREG32(SRBM_SOFT_RESET);
4859                 tmp |= srbm_soft_reset;
4860                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4861                 WREG32(SRBM_SOFT_RESET, tmp);
4862                 tmp = RREG32(SRBM_SOFT_RESET);
4863
4864                 udelay(50);
4865
4866                 tmp &= ~srbm_soft_reset;
4867                 WREG32(SRBM_SOFT_RESET, tmp);
4868                 tmp = RREG32(SRBM_SOFT_RESET);
4869         }
4870
4871         /* Wait a little for things to settle down */
4872         udelay(50);
4873
4874         evergreen_mc_resume(rdev, &save);
4875         udelay(50);
4876
4877         cik_print_gpu_status_regs(rdev);
4878 }
4879
4880 struct kv_reset_save_regs {
4881         u32 gmcon_reng_execute;
4882         u32 gmcon_misc;
4883         u32 gmcon_misc3;
4884 };
4885
4886 static void kv_save_regs_for_reset(struct radeon_device *rdev,
4887                                    struct kv_reset_save_regs *save)
4888 {
4889         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
4890         save->gmcon_misc = RREG32(GMCON_MISC);
4891         save->gmcon_misc3 = RREG32(GMCON_MISC3);
4892
4893         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
4894         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
4895                                                 STCTRL_STUTTER_EN));
4896 }
4897
4898 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
4899                                       struct kv_reset_save_regs *save)
4900 {
4901         int i;
4902
4903         WREG32(GMCON_PGFSM_WRITE, 0);
4904         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
4905
4906         for (i = 0; i < 5; i++)
4907                 WREG32(GMCON_PGFSM_WRITE, 0);
4908
4909         WREG32(GMCON_PGFSM_WRITE, 0);
4910         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
4911
4912         for (i = 0; i < 5; i++)
4913                 WREG32(GMCON_PGFSM_WRITE, 0);
4914
4915         WREG32(GMCON_PGFSM_WRITE, 0x210000);
4916         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
4917
4918         for (i = 0; i < 5; i++)
4919                 WREG32(GMCON_PGFSM_WRITE, 0);
4920
4921         WREG32(GMCON_PGFSM_WRITE, 0x21003);
4922         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
4923
4924         for (i = 0; i < 5; i++)
4925                 WREG32(GMCON_PGFSM_WRITE, 0);
4926
4927         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
4928         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
4929
4930         for (i = 0; i < 5; i++)
4931                 WREG32(GMCON_PGFSM_WRITE, 0);
4932
4933         WREG32(GMCON_PGFSM_WRITE, 0);
4934         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
4935
4936         for (i = 0; i < 5; i++)
4937                 WREG32(GMCON_PGFSM_WRITE, 0);
4938
4939         WREG32(GMCON_PGFSM_WRITE, 0x420000);
4940         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
4941
4942         for (i = 0; i < 5; i++)
4943                 WREG32(GMCON_PGFSM_WRITE, 0);
4944
4945         WREG32(GMCON_PGFSM_WRITE, 0x120202);
4946         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
4947
4948         for (i = 0; i < 5; i++)
4949                 WREG32(GMCON_PGFSM_WRITE, 0);
4950
4951         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
4952         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
4953
4954         for (i = 0; i < 5; i++)
4955                 WREG32(GMCON_PGFSM_WRITE, 0);
4956
4957         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
4958         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
4959
4960         for (i = 0; i < 5; i++)
4961                 WREG32(GMCON_PGFSM_WRITE, 0);
4962
4963         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
4964         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
4965
4966         WREG32(GMCON_MISC3, save->gmcon_misc3);
4967         WREG32(GMCON_MISC, save->gmcon_misc);
4968         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
4969 }
4970
4971 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
4972 {
4973         struct evergreen_mc_save save;
4974         struct kv_reset_save_regs kv_save = { 0 };
4975         u32 tmp, i;
4976
4977         dev_info(rdev->dev, "GPU pci config reset\n");
4978
4979         /* disable dpm? */
4980
4981         /* disable cg/pg */
4982         cik_fini_pg(rdev);
4983         cik_fini_cg(rdev);
4984
4985         /* Disable GFX parsing/prefetching */
4986         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4987
4988         /* Disable MEC parsing/prefetching */
4989         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4990
4991         /* sdma0 */
4992         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4993         tmp |= SDMA_HALT;
4994         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4995         /* sdma1 */
4996         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4997         tmp |= SDMA_HALT;
4998         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4999         /* XXX other engines? */
5000
5001         /* halt the rlc, disable cp internal ints */
5002         cik_rlc_stop(rdev);
5003
5004         udelay(50);
5005
5006         /* disable mem access */
5007         evergreen_mc_stop(rdev, &save);
5008         if (evergreen_mc_wait_for_idle(rdev)) {
5009                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5010         }
5011
5012         if (rdev->flags & RADEON_IS_IGP)
5013                 kv_save_regs_for_reset(rdev, &kv_save);
5014
5015         /* disable BM */
5016         pci_clear_master(rdev->pdev);
5017         /* reset */
5018         radeon_pci_config_reset(rdev);
5019
5020         udelay(100);
5021
5022         /* wait for asic to come out of reset */
5023         for (i = 0; i < rdev->usec_timeout; i++) {
5024                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5025                         break;
5026                 udelay(1);
5027         }
5028
5029         /* does asic init need to be run first??? */
5030         if (rdev->flags & RADEON_IS_IGP)
5031                 kv_restore_regs_for_reset(rdev, &kv_save);
5032 }
5033
5034 /**
5035  * cik_asic_reset - soft reset GPU
5036  *
5037  * @rdev: radeon_device pointer
5038  *
5039  * Look up which blocks are hung and attempt
5040  * to reset them.
5041  * Returns 0 for success.
5042  */
5043 int cik_asic_reset(struct radeon_device *rdev)
5044 {
5045         u32 reset_mask;
5046
5047         reset_mask = cik_gpu_check_soft_reset(rdev);
5048
5049         if (reset_mask)
5050                 r600_set_bios_scratch_engine_hung(rdev, true);
5051
5052         /* try soft reset */
5053         cik_gpu_soft_reset(rdev, reset_mask);
5054
5055         reset_mask = cik_gpu_check_soft_reset(rdev);
5056
5057         /* try pci config reset */
5058         if (reset_mask && radeon_hard_reset)
5059                 cik_gpu_pci_config_reset(rdev);
5060
5061         reset_mask = cik_gpu_check_soft_reset(rdev);
5062
5063         if (!reset_mask)
5064                 r600_set_bios_scratch_engine_hung(rdev, false);
5065
5066         return 0;
5067 }
5068
5069 /**
5070  * cik_gfx_is_lockup - check if the 3D engine is locked up
5071  *
5072  * @rdev: radeon_device pointer
5073  * @ring: radeon_ring structure holding ring information
5074  *
5075  * Check if the 3D engine is locked up (CIK).
5076  * Returns true if the engine is locked, false if not.
5077  */
5078 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5079 {
5080         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5081
5082         if (!(reset_mask & (RADEON_RESET_GFX |
5083                             RADEON_RESET_COMPUTE |
5084                             RADEON_RESET_CP))) {
5085                 radeon_ring_lockup_update(ring);
5086                 return false;
5087         }
5088         /* force CP activities */
5089         radeon_ring_force_activity(rdev, ring);
5090         return radeon_ring_test_lockup(rdev, ring);
5091 }
5092
5093 /* MC */
5094 /**
5095  * cik_mc_program - program the GPU memory controller
5096  *
5097  * @rdev: radeon_device pointer
5098  *
5099  * Set the location of vram, gart, and AGP in the GPU's
5100  * physical address space (CIK).
5101  */
5102 static void cik_mc_program(struct radeon_device *rdev)
5103 {
5104         struct evergreen_mc_save save;
5105         u32 tmp;
5106         int i, j;
5107
5108         /* Initialize HDP */
5109         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5110                 WREG32((0x2c14 + j), 0x00000000);
5111                 WREG32((0x2c18 + j), 0x00000000);
5112                 WREG32((0x2c1c + j), 0x00000000);
5113                 WREG32((0x2c20 + j), 0x00000000);
5114                 WREG32((0x2c24 + j), 0x00000000);
5115         }
5116         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5117
5118         evergreen_mc_stop(rdev, &save);
5119         if (radeon_mc_wait_for_idle(rdev)) {
5120                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5121         }
5122         /* Lockout access through VGA aperture*/
5123         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5124         /* Update configuration */
5125         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5126                rdev->mc.vram_start >> 12);
5127         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5128                rdev->mc.vram_end >> 12);
5129         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5130                rdev->vram_scratch.gpu_addr >> 12);
5131         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5132         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5133         WREG32(MC_VM_FB_LOCATION, tmp);
5134         /* XXX double check these! */
5135         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5136         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5137         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5138         WREG32(MC_VM_AGP_BASE, 0);
5139         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5140         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5141         if (radeon_mc_wait_for_idle(rdev)) {
5142                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5143         }
5144         evergreen_mc_resume(rdev, &save);
5145         /* we need to own VRAM, so turn off the VGA renderer here
5146          * to stop it overwriting our objects */
5147         rv515_vga_render_disable(rdev);
5148 }
5149
5150 /**
5151  * cik_mc_init - initialize the memory controller driver params
5152  *
5153  * @rdev: radeon_device pointer
5154  *
5155  * Look up the amount of vram, vram width, and decide how to place
5156  * vram and gart within the GPU's physical address space (CIK).
5157  * Returns 0 for success.
5158  */
5159 static int cik_mc_init(struct radeon_device *rdev)
5160 {
5161         u32 tmp;
5162         int chansize, numchan;
5163
5164         /* Get VRAM informations */
5165         rdev->mc.vram_is_ddr = true;
5166         tmp = RREG32(MC_ARB_RAMCFG);
5167         if (tmp & CHANSIZE_MASK) {
5168                 chansize = 64;
5169         } else {
5170                 chansize = 32;
5171         }
5172         tmp = RREG32(MC_SHARED_CHMAP);
5173         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5174         case 0:
5175         default:
5176                 numchan = 1;
5177                 break;
5178         case 1:
5179                 numchan = 2;
5180                 break;
5181         case 2:
5182                 numchan = 4;
5183                 break;
5184         case 3:
5185                 numchan = 8;
5186                 break;
5187         case 4:
5188                 numchan = 3;
5189                 break;
5190         case 5:
5191                 numchan = 6;
5192                 break;
5193         case 6:
5194                 numchan = 10;
5195                 break;
5196         case 7:
5197                 numchan = 12;
5198                 break;
5199         case 8:
5200                 numchan = 16;
5201                 break;
5202         }
5203         rdev->mc.vram_width = numchan * chansize;
5204         /* Could aper size report 0 ? */
5205         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5206         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5207         /* size in MB on si */
5208         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5209         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5210         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5211         si_vram_gtt_location(rdev, &rdev->mc);
5212         radeon_update_bandwidth_info(rdev);
5213
5214         return 0;
5215 }
5216
5217 /*
5218  * GART
5219  * VMID 0 is the physical GPU addresses as used by the kernel.
5220  * VMIDs 1-15 are used for userspace clients and are handled
5221  * by the radeon vm/hsa code.
5222  */
5223 /**
5224  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5225  *
5226  * @rdev: radeon_device pointer
5227  *
5228  * Flush the TLB for the VMID 0 page table (CIK).
5229  */
5230 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5231 {
5232         /* flush hdp cache */
5233         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5234
5235         /* bits 0-15 are the VM contexts0-15 */
5236         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5237 }
5238
5239 /**
5240  * cik_pcie_gart_enable - gart enable
5241  *
5242  * @rdev: radeon_device pointer
5243  *
5244  * This sets up the TLBs, programs the page tables for VMID0,
5245  * sets up the hw for VMIDs 1-15 which are allocated on
5246  * demand, and sets up the global locations for the LDS, GDS,
5247  * and GPUVM for FSA64 clients (CIK).
5248  * Returns 0 for success, errors for failure.
5249  */
5250 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5251 {
5252         int r, i;
5253
5254         if (rdev->gart.robj == NULL) {
5255                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5256                 return -EINVAL;
5257         }
5258         r = radeon_gart_table_vram_pin(rdev);
5259         if (r)
5260                 return r;
5261         radeon_gart_restore(rdev);
5262         /* Setup TLB control */
5263         WREG32(MC_VM_MX_L1_TLB_CNTL,
5264                (0xA << 7) |
5265                ENABLE_L1_TLB |
5266                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5267                ENABLE_ADVANCED_DRIVER_MODEL |
5268                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5269         /* Setup L2 cache */
5270         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5271                ENABLE_L2_FRAGMENT_PROCESSING |
5272                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5273                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5274                EFFECTIVE_L2_QUEUE_SIZE(7) |
5275                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5276         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5277         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5278                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5279         /* setup context0 */
5280         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5281         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5282         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5283         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5284                         (u32)(rdev->dummy_page.addr >> 12));
5285         WREG32(VM_CONTEXT0_CNTL2, 0);
5286         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5287                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5288
5289         WREG32(0x15D4, 0);
5290         WREG32(0x15D8, 0);
5291         WREG32(0x15DC, 0);
5292
5293         /* empty context1-15 */
5294         /* FIXME start with 4G, once using 2 level pt switch to full
5295          * vm size space
5296          */
5297         /* set vm size, must be a multiple of 4 */
5298         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5299         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5300         for (i = 1; i < 16; i++) {
5301                 if (i < 8)
5302                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5303                                rdev->gart.table_addr >> 12);
5304                 else
5305                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5306                                rdev->gart.table_addr >> 12);
5307         }
5308
5309         /* enable context1-15 */
5310         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5311                (u32)(rdev->dummy_page.addr >> 12));
5312         WREG32(VM_CONTEXT1_CNTL2, 4);
5313         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5314                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5315                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5316                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5317                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5318                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5319                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5320                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5321                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5322                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5323                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5324                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5325                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5326
5327         /* TC cache setup ??? */
5328         WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
5329         WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
5330         WREG32(TC_CFG_L1_STORE_POLICY, 0);
5331
5332         WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
5333         WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
5334         WREG32(TC_CFG_L2_STORE_POLICY0, 0);
5335         WREG32(TC_CFG_L2_STORE_POLICY1, 0);
5336         WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
5337
5338         WREG32(TC_CFG_L1_VOLATILE, 0);
5339         WREG32(TC_CFG_L2_VOLATILE, 0);
5340
5341         if (rdev->family == CHIP_KAVERI) {
5342                 u32 tmp = RREG32(CHUB_CONTROL);
5343                 tmp &= ~BYPASS_VM;
5344                 WREG32(CHUB_CONTROL, tmp);
5345         }
5346
5347         /* XXX SH_MEM regs */
5348         /* where to put LDS, scratch, GPUVM in FSA64 space */
5349         mutex_lock(&rdev->srbm_mutex);
5350         for (i = 0; i < 16; i++) {
5351                 cik_srbm_select(rdev, 0, 0, 0, i);
5352                 /* CP and shaders */
5353                 WREG32(SH_MEM_CONFIG, 0);
5354                 WREG32(SH_MEM_APE1_BASE, 1);
5355                 WREG32(SH_MEM_APE1_LIMIT, 0);
5356                 WREG32(SH_MEM_BASES, 0);
5357                 /* SDMA GFX */
5358                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5359                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5360                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5361                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5362                 /* XXX SDMA RLC - todo */
5363         }
5364         cik_srbm_select(rdev, 0, 0, 0, 0);
5365         mutex_unlock(&rdev->srbm_mutex);
5366
5367         cik_pcie_gart_tlb_flush(rdev);
5368         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5369                  (unsigned)(rdev->mc.gtt_size >> 20),
5370                  (unsigned long long)rdev->gart.table_addr);
5371         rdev->gart.ready = true;
5372         return 0;
5373 }
5374
5375 /**
5376  * cik_pcie_gart_disable - gart disable
5377  *
5378  * @rdev: radeon_device pointer
5379  *
5380  * This disables all VM page table (CIK).
5381  */
5382 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5383 {
5384         /* Disable all tables */
5385         WREG32(VM_CONTEXT0_CNTL, 0);
5386         WREG32(VM_CONTEXT1_CNTL, 0);
5387         /* Setup TLB control */
5388         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5389                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5390         /* Setup L2 cache */
5391         WREG32(VM_L2_CNTL,
5392                ENABLE_L2_FRAGMENT_PROCESSING |
5393                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5394                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5395                EFFECTIVE_L2_QUEUE_SIZE(7) |
5396                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5397         WREG32(VM_L2_CNTL2, 0);
5398         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5399                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5400         radeon_gart_table_vram_unpin(rdev);
5401 }
5402
5403 /**
5404  * cik_pcie_gart_fini - vm fini callback
5405  *
5406  * @rdev: radeon_device pointer
5407  *
5408  * Tears down the driver GART/VM setup (CIK).
5409  */
5410 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5411 {
5412         cik_pcie_gart_disable(rdev);
5413         radeon_gart_table_vram_free(rdev);
5414         radeon_gart_fini(rdev);
5415 }
5416
5417 /* vm parser */
5418 /**
5419  * cik_ib_parse - vm ib_parse callback
5420  *
5421  * @rdev: radeon_device pointer
5422  * @ib: indirect buffer pointer
5423  *
5424  * CIK uses hw IB checking so this is a nop (CIK).
5425  */
5426 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5427 {
5428         return 0;
5429 }
5430
5431 /*
5432  * vm
5433  * VMID 0 is the physical GPU addresses as used by the kernel.
5434  * VMIDs 1-15 are used for userspace clients and are handled
5435  * by the radeon vm/hsa code.
5436  */
5437 /**
5438  * cik_vm_init - cik vm init callback
5439  *
5440  * @rdev: radeon_device pointer
5441  *
5442  * Inits cik specific vm parameters (number of VMs, base of vram for
5443  * VMIDs 1-15) (CIK).
5444  * Returns 0 for success.
5445  */
5446 int cik_vm_init(struct radeon_device *rdev)
5447 {
5448         /* number of VMs */
5449         rdev->vm_manager.nvm = 16;
5450         /* base offset of vram pages */
5451         if (rdev->flags & RADEON_IS_IGP) {
5452                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5453                 tmp <<= 22;
5454                 rdev->vm_manager.vram_base_offset = tmp;
5455         } else
5456                 rdev->vm_manager.vram_base_offset = 0;
5457
5458         return 0;
5459 }
5460
5461 /**
5462  * cik_vm_fini - cik vm fini callback
5463  *
5464  * @rdev: radeon_device pointer
5465  *
5466  * Tear down any asic specific VM setup (CIK).
5467  */
5468 void cik_vm_fini(struct radeon_device *rdev)
5469 {
5470 }
5471
5472 /**
5473  * cik_vm_decode_fault - print human readable fault info
5474  *
5475  * @rdev: radeon_device pointer
5476  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5477  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5478  *
5479  * Print human readable fault information (CIK).
5480  */
5481 static void cik_vm_decode_fault(struct radeon_device *rdev,
5482                                 u32 status, u32 addr, u32 mc_client)
5483 {
5484         u32 mc_id;
5485         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5486         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5487         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5488                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5489
5490         if (rdev->family == CHIP_HAWAII)
5491                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5492         else
5493                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5494
5495         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5496                protections, vmid, addr,
5497                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5498                block, mc_client, mc_id);
5499 }
5500
5501 /**
5502  * cik_vm_flush - cik vm flush using the CP
5503  *
5504  * @rdev: radeon_device pointer
5505  *
5506  * Update the page table base and flush the VM TLB
5507  * using the CP (CIK).
5508  */
5509 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5510 {
5511         struct radeon_ring *ring = &rdev->ring[ridx];
5512
5513         if (vm == NULL)
5514                 return;
5515
5516         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5517         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5518                                  WRITE_DATA_DST_SEL(0)));
5519         if (vm->id < 8) {
5520                 radeon_ring_write(ring,
5521                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5522         } else {
5523                 radeon_ring_write(ring,
5524                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5525         }
5526         radeon_ring_write(ring, 0);
5527         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5528
5529         /* update SH_MEM_* regs */
5530         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5531         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5532                                  WRITE_DATA_DST_SEL(0)));
5533         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5534         radeon_ring_write(ring, 0);
5535         radeon_ring_write(ring, VMID(vm->id));
5536
5537         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5538         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5539                                  WRITE_DATA_DST_SEL(0)));
5540         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5541         radeon_ring_write(ring, 0);
5542
5543         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5544         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5545         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5546         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5547
5548         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5549         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5550                                  WRITE_DATA_DST_SEL(0)));
5551         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5552         radeon_ring_write(ring, 0);
5553         radeon_ring_write(ring, VMID(0));
5554
5555         /* HDP flush */
5556         /* We should be using the WAIT_REG_MEM packet here like in
5557          * cik_fence_ring_emit(), but it causes the CP to hang in this
5558          * context...
5559          */
5560         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5561         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5562                                  WRITE_DATA_DST_SEL(0)));
5563         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5564         radeon_ring_write(ring, 0);
5565         radeon_ring_write(ring, 0);
5566
5567         /* bits 0-15 are the VM contexts0-15 */
5568         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5569         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5570                                  WRITE_DATA_DST_SEL(0)));
5571         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5572         radeon_ring_write(ring, 0);
5573         radeon_ring_write(ring, 1 << vm->id);
5574
5575         /* compute doesn't have PFP */
5576         if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5577                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5578                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5579                 radeon_ring_write(ring, 0x0);
5580         }
5581 }
5582
5583 /*
5584  * RLC
5585  * The RLC is a multi-purpose microengine that handles a
5586  * variety of functions, the most important of which is
5587  * the interrupt controller.
5588  */
5589 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5590                                           bool enable)
5591 {
5592         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5593
5594         if (enable)
5595                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5596         else
5597                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5598         WREG32(CP_INT_CNTL_RING0, tmp);
5599 }
5600
5601 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5602 {
5603         u32 tmp;
5604
5605         tmp = RREG32(RLC_LB_CNTL);
5606         if (enable)
5607                 tmp |= LOAD_BALANCE_ENABLE;
5608         else
5609                 tmp &= ~LOAD_BALANCE_ENABLE;
5610         WREG32(RLC_LB_CNTL, tmp);
5611 }
5612
5613 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5614 {
5615         u32 i, j, k;
5616         u32 mask;
5617
5618         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5619                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5620                         cik_select_se_sh(rdev, i, j);
5621                         for (k = 0; k < rdev->usec_timeout; k++) {
5622                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5623                                         break;
5624                                 udelay(1);
5625                         }
5626                 }
5627         }
5628         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5629
5630         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5631         for (k = 0; k < rdev->usec_timeout; k++) {
5632                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5633                         break;
5634                 udelay(1);
5635         }
5636 }
5637
5638 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5639 {
5640         u32 tmp;
5641
5642         tmp = RREG32(RLC_CNTL);
5643         if (tmp != rlc)
5644                 WREG32(RLC_CNTL, rlc);
5645 }
5646
5647 static u32 cik_halt_rlc(struct radeon_device *rdev)
5648 {
5649         u32 data, orig;
5650
5651         orig = data = RREG32(RLC_CNTL);
5652
5653         if (data & RLC_ENABLE) {
5654                 u32 i;
5655
5656                 data &= ~RLC_ENABLE;
5657                 WREG32(RLC_CNTL, data);
5658
5659                 for (i = 0; i < rdev->usec_timeout; i++) {
5660                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5661                                 break;
5662                         udelay(1);
5663                 }
5664
5665                 cik_wait_for_rlc_serdes(rdev);
5666         }
5667
5668         return orig;
5669 }
5670
5671 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5672 {
5673         u32 tmp, i, mask;
5674
5675         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5676         WREG32(RLC_GPR_REG2, tmp);
5677
5678         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5679         for (i = 0; i < rdev->usec_timeout; i++) {
5680                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5681                         break;
5682                 udelay(1);
5683         }
5684
5685         for (i = 0; i < rdev->usec_timeout; i++) {
5686                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5687                         break;
5688                 udelay(1);
5689         }
5690 }
5691
5692 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5693 {
5694         u32 tmp;
5695
5696         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5697         WREG32(RLC_GPR_REG2, tmp);
5698 }
5699
5700 /**
5701  * cik_rlc_stop - stop the RLC ME
5702  *
5703  * @rdev: radeon_device pointer
5704  *
5705  * Halt the RLC ME (MicroEngine) (CIK).
5706  */
5707 static void cik_rlc_stop(struct radeon_device *rdev)
5708 {
5709         WREG32(RLC_CNTL, 0);
5710
5711         cik_enable_gui_idle_interrupt(rdev, false);
5712
5713         cik_wait_for_rlc_serdes(rdev);
5714 }
5715
5716 /**
5717  * cik_rlc_start - start the RLC ME
5718  *
5719  * @rdev: radeon_device pointer
5720  *
5721  * Unhalt the RLC ME (MicroEngine) (CIK).
5722  */
5723 static void cik_rlc_start(struct radeon_device *rdev)
5724 {
5725         WREG32(RLC_CNTL, RLC_ENABLE);
5726
5727         cik_enable_gui_idle_interrupt(rdev, true);
5728
5729         udelay(50);
5730 }
5731
5732 /**
5733  * cik_rlc_resume - setup the RLC hw
5734  *
5735  * @rdev: radeon_device pointer
5736  *
5737  * Initialize the RLC registers, load the ucode,
5738  * and start the RLC (CIK).
5739  * Returns 0 for success, -EINVAL if the ucode is not available.
5740  */
5741 static int cik_rlc_resume(struct radeon_device *rdev)
5742 {
5743         u32 i, size, tmp;
5744         const __be32 *fw_data;
5745
5746         if (!rdev->rlc_fw)
5747                 return -EINVAL;
5748
5749         switch (rdev->family) {
5750         case CHIP_BONAIRE:
5751         case CHIP_HAWAII:
5752         default:
5753                 size = BONAIRE_RLC_UCODE_SIZE;
5754                 break;
5755         case CHIP_KAVERI:
5756                 size = KV_RLC_UCODE_SIZE;
5757                 break;
5758         case CHIP_KABINI:
5759                 size = KB_RLC_UCODE_SIZE;
5760                 break;
5761         }
5762
5763         cik_rlc_stop(rdev);
5764
5765         /* disable CG */
5766         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5767         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5768
5769         si_rlc_reset(rdev);
5770
5771         cik_init_pg(rdev);
5772
5773         cik_init_cg(rdev);
5774
5775         WREG32(RLC_LB_CNTR_INIT, 0);
5776         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5777
5778         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5779         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5780         WREG32(RLC_LB_PARAMS, 0x00600408);
5781         WREG32(RLC_LB_CNTL, 0x80000004);
5782
5783         WREG32(RLC_MC_CNTL, 0);
5784         WREG32(RLC_UCODE_CNTL, 0);
5785
5786         fw_data = (const __be32 *)rdev->rlc_fw->data;
5787                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5788         for (i = 0; i < size; i++)
5789                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5790         WREG32(RLC_GPM_UCODE_ADDR, 0);
5791
5792         /* XXX - find out what chips support lbpw */
5793         cik_enable_lbpw(rdev, false);
5794
5795         if (rdev->family == CHIP_BONAIRE)
5796                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5797
5798         cik_rlc_start(rdev);
5799
5800         return 0;
5801 }
5802
5803 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5804 {
5805         u32 data, orig, tmp, tmp2;
5806
5807         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5808
5809         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5810                 cik_enable_gui_idle_interrupt(rdev, true);
5811
5812                 tmp = cik_halt_rlc(rdev);
5813
5814                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5815                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5816                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5817                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5818                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5819
5820                 cik_update_rlc(rdev, tmp);
5821
5822                 data |= CGCG_EN | CGLS_EN;
5823         } else {
5824                 cik_enable_gui_idle_interrupt(rdev, false);
5825
5826                 RREG32(CB_CGTT_SCLK_CTRL);
5827                 RREG32(CB_CGTT_SCLK_CTRL);
5828                 RREG32(CB_CGTT_SCLK_CTRL);
5829                 RREG32(CB_CGTT_SCLK_CTRL);
5830
5831                 data &= ~(CGCG_EN | CGLS_EN);
5832         }
5833
5834         if (orig != data)
5835                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5836
5837 }
5838
5839 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5840 {
5841         u32 data, orig, tmp = 0;
5842
5843         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5844                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5845                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5846                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5847                                 data |= CP_MEM_LS_EN;
5848                                 if (orig != data)
5849                                         WREG32(CP_MEM_SLP_CNTL, data);
5850                         }
5851                 }
5852
5853                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5854                 data &= 0xfffffffd;
5855                 if (orig != data)
5856                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5857
5858                 tmp = cik_halt_rlc(rdev);
5859
5860                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5861                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5862                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5863                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5864                 WREG32(RLC_SERDES_WR_CTRL, data);
5865
5866                 cik_update_rlc(rdev, tmp);
5867
5868                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5869                         orig = data = RREG32(CGTS_SM_CTRL_REG);
5870                         data &= ~SM_MODE_MASK;
5871                         data |= SM_MODE(0x2);
5872                         data |= SM_MODE_ENABLE;
5873                         data &= ~CGTS_OVERRIDE;
5874                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5875                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5876                                 data &= ~CGTS_LS_OVERRIDE;
5877                         data &= ~ON_MONITOR_ADD_MASK;
5878                         data |= ON_MONITOR_ADD_EN;
5879                         data |= ON_MONITOR_ADD(0x96);
5880                         if (orig != data)
5881                                 WREG32(CGTS_SM_CTRL_REG, data);
5882                 }
5883         } else {
5884                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5885                 data |= 0x00000002;
5886                 if (orig != data)
5887                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5888
5889                 data = RREG32(RLC_MEM_SLP_CNTL);
5890                 if (data & RLC_MEM_LS_EN) {
5891                         data &= ~RLC_MEM_LS_EN;
5892                         WREG32(RLC_MEM_SLP_CNTL, data);
5893                 }
5894
5895                 data = RREG32(CP_MEM_SLP_CNTL);
5896                 if (data & CP_MEM_LS_EN) {
5897                         data &= ~CP_MEM_LS_EN;
5898                         WREG32(CP_MEM_SLP_CNTL, data);
5899                 }
5900
5901                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5902                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5903                 if (orig != data)
5904                         WREG32(CGTS_SM_CTRL_REG, data);
5905
5906                 tmp = cik_halt_rlc(rdev);
5907
5908                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5909                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5910                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5911                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5912                 WREG32(RLC_SERDES_WR_CTRL, data);
5913
5914                 cik_update_rlc(rdev, tmp);
5915         }
5916 }
5917
5918 static const u32 mc_cg_registers[] =
5919 {
5920         MC_HUB_MISC_HUB_CG,
5921         MC_HUB_MISC_SIP_CG,
5922         MC_HUB_MISC_VM_CG,
5923         MC_XPB_CLK_GAT,
5924         ATC_MISC_CG,
5925         MC_CITF_MISC_WR_CG,
5926         MC_CITF_MISC_RD_CG,
5927         MC_CITF_MISC_VM_CG,
5928         VM_L2_CG,
5929 };
5930
5931 static void cik_enable_mc_ls(struct radeon_device *rdev,
5932                              bool enable)
5933 {
5934         int i;
5935         u32 orig, data;
5936
5937         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5938                 orig = data = RREG32(mc_cg_registers[i]);
5939                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5940                         data |= MC_LS_ENABLE;
5941                 else
5942                         data &= ~MC_LS_ENABLE;
5943                 if (data != orig)
5944                         WREG32(mc_cg_registers[i], data);
5945         }
5946 }
5947
5948 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5949                                bool enable)
5950 {
5951         int i;
5952         u32 orig, data;
5953
5954         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5955                 orig = data = RREG32(mc_cg_registers[i]);
5956                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5957                         data |= MC_CG_ENABLE;
5958                 else
5959                         data &= ~MC_CG_ENABLE;
5960                 if (data != orig)
5961                         WREG32(mc_cg_registers[i], data);
5962         }
5963 }
5964
5965 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5966                                  bool enable)
5967 {
5968         u32 orig, data;
5969
5970         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5971                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5972                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5973         } else {
5974                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5975                 data |= 0xff000000;
5976                 if (data != orig)
5977                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5978
5979                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5980                 data |= 0xff000000;
5981                 if (data != orig)
5982                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5983         }
5984 }
5985
5986 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5987                                  bool enable)
5988 {
5989         u32 orig, data;
5990
5991         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5992                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5993                 data |= 0x100;
5994                 if (orig != data)
5995                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5996
5997                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5998                 data |= 0x100;
5999                 if (orig != data)
6000                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6001         } else {
6002                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6003                 data &= ~0x100;
6004                 if (orig != data)
6005                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6006
6007                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6008                 data &= ~0x100;
6009                 if (orig != data)
6010                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6011         }
6012 }
6013
6014 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6015                                 bool enable)
6016 {
6017         u32 orig, data;
6018
6019         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6020                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6021                 data = 0xfff;
6022                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6023
6024                 orig = data = RREG32(UVD_CGC_CTRL);
6025                 data |= DCM;
6026                 if (orig != data)
6027                         WREG32(UVD_CGC_CTRL, data);
6028         } else {
6029                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6030                 data &= ~0xfff;
6031                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6032
6033                 orig = data = RREG32(UVD_CGC_CTRL);
6034                 data &= ~DCM;
6035                 if (orig != data)
6036                         WREG32(UVD_CGC_CTRL, data);
6037         }
6038 }
6039
6040 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6041                                bool enable)
6042 {
6043         u32 orig, data;
6044
6045         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6046
6047         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6048                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6049                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6050         else
6051                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6052                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6053
6054         if (orig != data)
6055                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6056 }
6057
6058 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6059                                 bool enable)
6060 {
6061         u32 orig, data;
6062
6063         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6064
6065         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6066                 data &= ~CLOCK_GATING_DIS;
6067         else
6068                 data |= CLOCK_GATING_DIS;
6069
6070         if (orig != data)
6071                 WREG32(HDP_HOST_PATH_CNTL, data);
6072 }
6073
6074 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6075                               bool enable)
6076 {
6077         u32 orig, data;
6078
6079         orig = data = RREG32(HDP_MEM_POWER_LS);
6080
6081         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6082                 data |= HDP_LS_ENABLE;
6083         else
6084                 data &= ~HDP_LS_ENABLE;
6085
6086         if (orig != data)
6087                 WREG32(HDP_MEM_POWER_LS, data);
6088 }
6089
6090 void cik_update_cg(struct radeon_device *rdev,
6091                    u32 block, bool enable)
6092 {
6093
6094         if (block & RADEON_CG_BLOCK_GFX) {
6095                 cik_enable_gui_idle_interrupt(rdev, false);
6096                 /* order matters! */
6097                 if (enable) {
6098                         cik_enable_mgcg(rdev, true);
6099                         cik_enable_cgcg(rdev, true);
6100                 } else {
6101                         cik_enable_cgcg(rdev, false);
6102                         cik_enable_mgcg(rdev, false);
6103                 }
6104                 cik_enable_gui_idle_interrupt(rdev, true);
6105         }
6106
6107         if (block & RADEON_CG_BLOCK_MC) {
6108                 if (!(rdev->flags & RADEON_IS_IGP)) {
6109                         cik_enable_mc_mgcg(rdev, enable);
6110                         cik_enable_mc_ls(rdev, enable);
6111                 }
6112         }
6113
6114         if (block & RADEON_CG_BLOCK_SDMA) {
6115                 cik_enable_sdma_mgcg(rdev, enable);
6116                 cik_enable_sdma_mgls(rdev, enable);
6117         }
6118
6119         if (block & RADEON_CG_BLOCK_BIF) {
6120                 cik_enable_bif_mgls(rdev, enable);
6121         }
6122
6123         if (block & RADEON_CG_BLOCK_UVD) {
6124                 if (rdev->has_uvd)
6125                         cik_enable_uvd_mgcg(rdev, enable);
6126         }
6127
6128         if (block & RADEON_CG_BLOCK_HDP) {
6129                 cik_enable_hdp_mgcg(rdev, enable);
6130                 cik_enable_hdp_ls(rdev, enable);
6131         }
6132 }
6133
6134 static void cik_init_cg(struct radeon_device *rdev)
6135 {
6136
6137         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6138
6139         if (rdev->has_uvd)
6140                 si_init_uvd_internal_cg(rdev);
6141
6142         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6143                              RADEON_CG_BLOCK_SDMA |
6144                              RADEON_CG_BLOCK_BIF |
6145                              RADEON_CG_BLOCK_UVD |
6146                              RADEON_CG_BLOCK_HDP), true);
6147 }
6148
6149 static void cik_fini_cg(struct radeon_device *rdev)
6150 {
6151         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6152                              RADEON_CG_BLOCK_SDMA |
6153                              RADEON_CG_BLOCK_BIF |
6154                              RADEON_CG_BLOCK_UVD |
6155                              RADEON_CG_BLOCK_HDP), false);
6156
6157         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6158 }
6159
6160 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6161                                           bool enable)
6162 {
6163         u32 data, orig;
6164
6165         orig = data = RREG32(RLC_PG_CNTL);
6166         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6167                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6168         else
6169                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6170         if (orig != data)
6171                 WREG32(RLC_PG_CNTL, data);
6172 }
6173
6174 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6175                                           bool enable)
6176 {
6177         u32 data, orig;
6178
6179         orig = data = RREG32(RLC_PG_CNTL);
6180         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6181                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6182         else
6183                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6184         if (orig != data)
6185                 WREG32(RLC_PG_CNTL, data);
6186 }
6187
6188 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6189 {
6190         u32 data, orig;
6191
6192         orig = data = RREG32(RLC_PG_CNTL);
6193         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6194                 data &= ~DISABLE_CP_PG;
6195         else
6196                 data |= DISABLE_CP_PG;
6197         if (orig != data)
6198                 WREG32(RLC_PG_CNTL, data);
6199 }
6200
6201 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6202 {
6203         u32 data, orig;
6204
6205         orig = data = RREG32(RLC_PG_CNTL);
6206         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6207                 data &= ~DISABLE_GDS_PG;
6208         else
6209                 data |= DISABLE_GDS_PG;
6210         if (orig != data)
6211                 WREG32(RLC_PG_CNTL, data);
6212 }
6213
6214 #define CP_ME_TABLE_SIZE    96
6215 #define CP_ME_TABLE_OFFSET  2048
6216 #define CP_MEC_TABLE_OFFSET 4096
6217
6218 void cik_init_cp_pg_table(struct radeon_device *rdev)
6219 {
6220         const __be32 *fw_data;
6221         volatile u32 *dst_ptr;
6222         int me, i, max_me = 4;
6223         u32 bo_offset = 0;
6224         u32 table_offset;
6225
6226         if (rdev->family == CHIP_KAVERI)
6227                 max_me = 5;
6228
6229         if (rdev->rlc.cp_table_ptr == NULL)
6230                 return;
6231
6232         /* write the cp table buffer */
6233         dst_ptr = rdev->rlc.cp_table_ptr;
6234         for (me = 0; me < max_me; me++) {
6235                 if (me == 0) {
6236                         fw_data = (const __be32 *)rdev->ce_fw->data;
6237                         table_offset = CP_ME_TABLE_OFFSET;
6238                 } else if (me == 1) {
6239                         fw_data = (const __be32 *)rdev->pfp_fw->data;
6240                         table_offset = CP_ME_TABLE_OFFSET;
6241                 } else if (me == 2) {
6242                         fw_data = (const __be32 *)rdev->me_fw->data;
6243                         table_offset = CP_ME_TABLE_OFFSET;
6244                 } else {
6245                         fw_data = (const __be32 *)rdev->mec_fw->data;
6246                         table_offset = CP_MEC_TABLE_OFFSET;
6247                 }
6248
6249                 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6250                         dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6251                 }
6252                 bo_offset += CP_ME_TABLE_SIZE;
6253         }
6254 }
6255
6256 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6257                                 bool enable)
6258 {
6259         u32 data, orig;
6260
6261         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6262                 orig = data = RREG32(RLC_PG_CNTL);
6263                 data |= GFX_PG_ENABLE;
6264                 if (orig != data)
6265                         WREG32(RLC_PG_CNTL, data);
6266
6267                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6268                 data |= AUTO_PG_EN;
6269                 if (orig != data)
6270                         WREG32(RLC_AUTO_PG_CTRL, data);
6271         } else {
6272                 orig = data = RREG32(RLC_PG_CNTL);
6273                 data &= ~GFX_PG_ENABLE;
6274                 if (orig != data)
6275                         WREG32(RLC_PG_CNTL, data);
6276
6277                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6278                 data &= ~AUTO_PG_EN;
6279                 if (orig != data)
6280                         WREG32(RLC_AUTO_PG_CTRL, data);
6281
6282                 data = RREG32(DB_RENDER_CONTROL);
6283         }
6284 }
6285
6286 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6287 {
6288         u32 mask = 0, tmp, tmp1;
6289         int i;
6290
6291         cik_select_se_sh(rdev, se, sh);
6292         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6293         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6294         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6295
6296         tmp &= 0xffff0000;
6297
6298         tmp |= tmp1;
6299         tmp >>= 16;
6300
6301         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6302                 mask <<= 1;
6303                 mask |= 1;
6304         }
6305
6306         return (~tmp) & mask;
6307 }
6308
6309 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6310 {
6311         u32 i, j, k, active_cu_number = 0;
6312         u32 mask, counter, cu_bitmap;
6313         u32 tmp = 0;
6314
6315         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6316                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6317                         mask = 1;
6318                         cu_bitmap = 0;
6319                         counter = 0;
6320                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6321                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6322                                         if (counter < 2)
6323                                                 cu_bitmap |= mask;
6324                                         counter ++;
6325                                 }
6326                                 mask <<= 1;
6327                         }
6328
6329                         active_cu_number += counter;
6330                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6331                 }
6332         }
6333
6334         WREG32(RLC_PG_AO_CU_MASK, tmp);
6335
6336         tmp = RREG32(RLC_MAX_PG_CU);
6337         tmp &= ~MAX_PU_CU_MASK;
6338         tmp |= MAX_PU_CU(active_cu_number);
6339         WREG32(RLC_MAX_PG_CU, tmp);
6340 }
6341
6342 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6343                                        bool enable)
6344 {
6345         u32 data, orig;
6346
6347         orig = data = RREG32(RLC_PG_CNTL);
6348         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6349                 data |= STATIC_PER_CU_PG_ENABLE;
6350         else
6351                 data &= ~STATIC_PER_CU_PG_ENABLE;
6352         if (orig != data)
6353                 WREG32(RLC_PG_CNTL, data);
6354 }
6355
6356 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6357                                         bool enable)
6358 {
6359         u32 data, orig;
6360
6361         orig = data = RREG32(RLC_PG_CNTL);
6362         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6363                 data |= DYN_PER_CU_PG_ENABLE;
6364         else
6365                 data &= ~DYN_PER_CU_PG_ENABLE;
6366         if (orig != data)
6367                 WREG32(RLC_PG_CNTL, data);
6368 }
6369
6370 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6371 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6372
6373 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6374 {
6375         u32 data, orig;
6376         u32 i;
6377
6378         if (rdev->rlc.cs_data) {
6379                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6380                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6381                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6382                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6383         } else {
6384                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6385                 for (i = 0; i < 3; i++)
6386                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6387         }
6388         if (rdev->rlc.reg_list) {
6389                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6390                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6391                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6392         }
6393
6394         orig = data = RREG32(RLC_PG_CNTL);
6395         data |= GFX_PG_SRC;
6396         if (orig != data)
6397                 WREG32(RLC_PG_CNTL, data);
6398
6399         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6400         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6401
6402         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6403         data &= ~IDLE_POLL_COUNT_MASK;
6404         data |= IDLE_POLL_COUNT(0x60);
6405         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6406
6407         data = 0x10101010;
6408         WREG32(RLC_PG_DELAY, data);
6409
6410         data = RREG32(RLC_PG_DELAY_2);
6411         data &= ~0xff;
6412         data |= 0x3;
6413         WREG32(RLC_PG_DELAY_2, data);
6414
6415         data = RREG32(RLC_AUTO_PG_CTRL);
6416         data &= ~GRBM_REG_SGIT_MASK;
6417         data |= GRBM_REG_SGIT(0x700);
6418         WREG32(RLC_AUTO_PG_CTRL, data);
6419
6420 }
6421
6422 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6423 {
6424         cik_enable_gfx_cgpg(rdev, enable);
6425         cik_enable_gfx_static_mgpg(rdev, enable);
6426         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6427 }
6428
6429 u32 cik_get_csb_size(struct radeon_device *rdev)
6430 {
6431         u32 count = 0;
6432         const struct cs_section_def *sect = NULL;
6433         const struct cs_extent_def *ext = NULL;
6434
6435         if (rdev->rlc.cs_data == NULL)
6436                 return 0;
6437
6438         /* begin clear state */
6439         count += 2;
6440         /* context control state */
6441         count += 3;
6442
6443         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6444                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6445                         if (sect->id == SECT_CONTEXT)
6446                                 count += 2 + ext->reg_count;
6447                         else
6448                                 return 0;
6449                 }
6450         }
6451         /* pa_sc_raster_config/pa_sc_raster_config1 */
6452         count += 4;
6453         /* end clear state */
6454         count += 2;
6455         /* clear state */
6456         count += 2;
6457
6458         return count;
6459 }
6460
6461 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6462 {
6463         u32 count = 0, i;
6464         const struct cs_section_def *sect = NULL;
6465         const struct cs_extent_def *ext = NULL;
6466
6467         if (rdev->rlc.cs_data == NULL)
6468                 return;
6469         if (buffer == NULL)
6470                 return;
6471
6472         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6473         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6474
6475         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6476         buffer[count++] = cpu_to_le32(0x80000000);
6477         buffer[count++] = cpu_to_le32(0x80000000);
6478
6479         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6480                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6481                         if (sect->id == SECT_CONTEXT) {
6482                                 buffer[count++] =
6483                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6484                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6485                                 for (i = 0; i < ext->reg_count; i++)
6486                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6487                         } else {
6488                                 return;
6489                         }
6490                 }
6491         }
6492
6493         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6494         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6495         switch (rdev->family) {
6496         case CHIP_BONAIRE:
6497                 buffer[count++] = cpu_to_le32(0x16000012);
6498                 buffer[count++] = cpu_to_le32(0x00000000);
6499                 break;
6500         case CHIP_KAVERI:
6501                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6502                 buffer[count++] = cpu_to_le32(0x00000000);
6503                 break;
6504         case CHIP_KABINI:
6505                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6506                 buffer[count++] = cpu_to_le32(0x00000000);
6507                 break;
6508         case CHIP_HAWAII:
6509                 buffer[count++] = 0x3a00161a;
6510                 buffer[count++] = 0x0000002e;
6511                 break;
6512         default:
6513                 buffer[count++] = cpu_to_le32(0x00000000);
6514                 buffer[count++] = cpu_to_le32(0x00000000);
6515                 break;
6516         }
6517
6518         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6519         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6520
6521         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6522         buffer[count++] = cpu_to_le32(0);
6523 }
6524
6525 static void cik_init_pg(struct radeon_device *rdev)
6526 {
6527         if (rdev->pg_flags) {
6528                 cik_enable_sck_slowdown_on_pu(rdev, true);
6529                 cik_enable_sck_slowdown_on_pd(rdev, true);
6530                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6531                         cik_init_gfx_cgpg(rdev);
6532                         cik_enable_cp_pg(rdev, true);
6533                         cik_enable_gds_pg(rdev, true);
6534                 }
6535                 cik_init_ao_cu_mask(rdev);
6536                 cik_update_gfx_pg(rdev, true);
6537         }
6538 }
6539
6540 static void cik_fini_pg(struct radeon_device *rdev)
6541 {
6542         if (rdev->pg_flags) {
6543                 cik_update_gfx_pg(rdev, false);
6544                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6545                         cik_enable_cp_pg(rdev, false);
6546                         cik_enable_gds_pg(rdev, false);
6547                 }
6548         }
6549 }
6550
6551 /*
6552  * Interrupts
6553  * Starting with r6xx, interrupts are handled via a ring buffer.
6554  * Ring buffers are areas of GPU accessible memory that the GPU
6555  * writes interrupt vectors into and the host reads vectors out of.
6556  * There is a rptr (read pointer) that determines where the
6557  * host is currently reading, and a wptr (write pointer)
6558  * which determines where the GPU has written.  When the
6559  * pointers are equal, the ring is idle.  When the GPU
6560  * writes vectors to the ring buffer, it increments the
6561  * wptr.  When there is an interrupt, the host then starts
6562  * fetching commands and processing them until the pointers are
6563  * equal again at which point it updates the rptr.
6564  */
6565
6566 /**
6567  * cik_enable_interrupts - Enable the interrupt ring buffer
6568  *
6569  * @rdev: radeon_device pointer
6570  *
6571  * Enable the interrupt ring buffer (CIK).
6572  */
6573 static void cik_enable_interrupts(struct radeon_device *rdev)
6574 {
6575         u32 ih_cntl = RREG32(IH_CNTL);
6576         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6577
6578         ih_cntl |= ENABLE_INTR;
6579         ih_rb_cntl |= IH_RB_ENABLE;
6580         WREG32(IH_CNTL, ih_cntl);
6581         WREG32(IH_RB_CNTL, ih_rb_cntl);
6582         rdev->ih.enabled = true;
6583 }
6584
6585 /**
6586  * cik_disable_interrupts - Disable the interrupt ring buffer
6587  *
6588  * @rdev: radeon_device pointer
6589  *
6590  * Disable the interrupt ring buffer (CIK).
6591  */
6592 static void cik_disable_interrupts(struct radeon_device *rdev)
6593 {
6594         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6595         u32 ih_cntl = RREG32(IH_CNTL);
6596
6597         ih_rb_cntl &= ~IH_RB_ENABLE;
6598         ih_cntl &= ~ENABLE_INTR;
6599         WREG32(IH_RB_CNTL, ih_rb_cntl);
6600         WREG32(IH_CNTL, ih_cntl);
6601         /* set rptr, wptr to 0 */
6602         WREG32(IH_RB_RPTR, 0);
6603         WREG32(IH_RB_WPTR, 0);
6604         rdev->ih.enabled = false;
6605         rdev->ih.rptr = 0;
6606 }
6607
6608 /**
6609  * cik_disable_interrupt_state - Disable all interrupt sources
6610  *
6611  * @rdev: radeon_device pointer
6612  *
6613  * Clear all interrupt enable bits used by the driver (CIK).
6614  */
6615 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6616 {
6617         u32 tmp;
6618
6619         /* gfx ring */
6620         tmp = RREG32(CP_INT_CNTL_RING0) &
6621                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6622         WREG32(CP_INT_CNTL_RING0, tmp);
6623         /* sdma */
6624         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6625         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6626         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6627         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6628         /* compute queues */
6629         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6630         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6631         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6632         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6633         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6634         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6635         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6636         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6637         /* grbm */
6638         WREG32(GRBM_INT_CNTL, 0);
6639         /* vline/vblank, etc. */
6640         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6641         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6642         if (rdev->num_crtc >= 4) {
6643                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6644                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6645         }
6646         if (rdev->num_crtc >= 6) {
6647                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6648                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6649         }
6650
6651         /* dac hotplug */
6652         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6653
6654         /* digital hotplug */
6655         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6656         WREG32(DC_HPD1_INT_CONTROL, tmp);
6657         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6658         WREG32(DC_HPD2_INT_CONTROL, tmp);
6659         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6660         WREG32(DC_HPD3_INT_CONTROL, tmp);
6661         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6662         WREG32(DC_HPD4_INT_CONTROL, tmp);
6663         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6664         WREG32(DC_HPD5_INT_CONTROL, tmp);
6665         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6666         WREG32(DC_HPD6_INT_CONTROL, tmp);
6667
6668 }
6669
6670 /**
6671  * cik_irq_init - init and enable the interrupt ring
6672  *
6673  * @rdev: radeon_device pointer
6674  *
6675  * Allocate a ring buffer for the interrupt controller,
6676  * enable the RLC, disable interrupts, enable the IH
6677  * ring buffer and enable it (CIK).
6678  * Called at device load and reume.
6679  * Returns 0 for success, errors for failure.
6680  */
6681 static int cik_irq_init(struct radeon_device *rdev)
6682 {
6683         int ret = 0;
6684         int rb_bufsz;
6685         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6686
6687         /* allocate ring */
6688         ret = r600_ih_ring_alloc(rdev);
6689         if (ret)
6690                 return ret;
6691
6692         /* disable irqs */
6693         cik_disable_interrupts(rdev);
6694
6695         /* init rlc */
6696         ret = cik_rlc_resume(rdev);
6697         if (ret) {
6698                 r600_ih_ring_fini(rdev);
6699                 return ret;
6700         }
6701
6702         /* setup interrupt control */
6703         /* XXX this should actually be a bus address, not an MC address. same on older asics */
6704         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6705         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6706         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6707          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6708          */
6709         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6710         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6711         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6712         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6713
6714         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6715         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6716
6717         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6718                       IH_WPTR_OVERFLOW_CLEAR |
6719                       (rb_bufsz << 1));
6720
6721         if (rdev->wb.enabled)
6722                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6723
6724         /* set the writeback address whether it's enabled or not */
6725         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6726         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6727
6728         WREG32(IH_RB_CNTL, ih_rb_cntl);
6729
6730         /* set rptr, wptr to 0 */
6731         WREG32(IH_RB_RPTR, 0);
6732         WREG32(IH_RB_WPTR, 0);
6733
6734         /* Default settings for IH_CNTL (disabled at first) */
6735         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6736         /* RPTR_REARM only works if msi's are enabled */
6737         if (rdev->msi_enabled)
6738                 ih_cntl |= RPTR_REARM;
6739         WREG32(IH_CNTL, ih_cntl);
6740
6741         /* force the active interrupt state to all disabled */
6742         cik_disable_interrupt_state(rdev);
6743
6744         pci_set_master(rdev->pdev);
6745
6746         /* enable irqs */
6747         cik_enable_interrupts(rdev);
6748
6749         return ret;
6750 }
6751
6752 /**
6753  * cik_irq_set - enable/disable interrupt sources
6754  *
6755  * @rdev: radeon_device pointer
6756  *
6757  * Enable interrupt sources on the GPU (vblanks, hpd,
6758  * etc.) (CIK).
6759  * Returns 0 for success, errors for failure.
6760  */
6761 int cik_irq_set(struct radeon_device *rdev)
6762 {
6763         u32 cp_int_cntl;
6764         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6765         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6766         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6767         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6768         u32 grbm_int_cntl = 0;
6769         u32 dma_cntl, dma_cntl1;
6770         u32 thermal_int;
6771
6772         if (!rdev->irq.installed) {
6773                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6774                 return -EINVAL;
6775         }
6776         /* don't enable anything if the ih is disabled */
6777         if (!rdev->ih.enabled) {
6778                 cik_disable_interrupts(rdev);
6779                 /* force the active interrupt state to all disabled */
6780                 cik_disable_interrupt_state(rdev);
6781                 return 0;
6782         }
6783
6784         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6785                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6786         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6787
6788         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6789         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6790         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6791         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6792         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6793         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6794
6795         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6796         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6797
6798         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6799         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6800         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6801         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6802         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6803         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6804         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6805         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6806
6807         if (rdev->flags & RADEON_IS_IGP)
6808                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6809                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
6810         else
6811                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6812                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6813
6814         /* enable CP interrupts on all rings */
6815         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6816                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6817                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6818         }
6819         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6820                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6821                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6822                 if (ring->me == 1) {
6823                         switch (ring->pipe) {
6824                         case 0:
6825                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6826                                 break;
6827                         case 1:
6828                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6829                                 break;
6830                         case 2:
6831                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6832                                 break;
6833                         case 3:
6834                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6835                                 break;
6836                         default:
6837                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6838                                 break;
6839                         }
6840                 } else if (ring->me == 2) {
6841                         switch (ring->pipe) {
6842                         case 0:
6843                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6844                                 break;
6845                         case 1:
6846                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6847                                 break;
6848                         case 2:
6849                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6850                                 break;
6851                         case 3:
6852                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6853                                 break;
6854                         default:
6855                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6856                                 break;
6857                         }
6858                 } else {
6859                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6860                 }
6861         }
6862         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6863                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6864                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6865                 if (ring->me == 1) {
6866                         switch (ring->pipe) {
6867                         case 0:
6868                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6869                                 break;
6870                         case 1:
6871                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6872                                 break;
6873                         case 2:
6874                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6875                                 break;
6876                         case 3:
6877                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6878                                 break;
6879                         default:
6880                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6881                                 break;
6882                         }
6883                 } else if (ring->me == 2) {
6884                         switch (ring->pipe) {
6885                         case 0:
6886                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6887                                 break;
6888                         case 1:
6889                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6890                                 break;
6891                         case 2:
6892                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6893                                 break;
6894                         case 3:
6895                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6896                                 break;
6897                         default:
6898                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6899                                 break;
6900                         }
6901                 } else {
6902                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6903                 }
6904         }
6905
6906         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6907                 DRM_DEBUG("cik_irq_set: sw int dma\n");
6908                 dma_cntl |= TRAP_ENABLE;
6909         }
6910
6911         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6912                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6913                 dma_cntl1 |= TRAP_ENABLE;
6914         }
6915
6916         if (rdev->irq.crtc_vblank_int[0] ||
6917             atomic_read(&rdev->irq.pflip[0])) {
6918                 DRM_DEBUG("cik_irq_set: vblank 0\n");
6919                 crtc1 |= VBLANK_INTERRUPT_MASK;
6920         }
6921         if (rdev->irq.crtc_vblank_int[1] ||
6922             atomic_read(&rdev->irq.pflip[1])) {
6923                 DRM_DEBUG("cik_irq_set: vblank 1\n");
6924                 crtc2 |= VBLANK_INTERRUPT_MASK;
6925         }
6926         if (rdev->irq.crtc_vblank_int[2] ||
6927             atomic_read(&rdev->irq.pflip[2])) {
6928                 DRM_DEBUG("cik_irq_set: vblank 2\n");
6929                 crtc3 |= VBLANK_INTERRUPT_MASK;
6930         }
6931         if (rdev->irq.crtc_vblank_int[3] ||
6932             atomic_read(&rdev->irq.pflip[3])) {
6933                 DRM_DEBUG("cik_irq_set: vblank 3\n");
6934                 crtc4 |= VBLANK_INTERRUPT_MASK;
6935         }
6936         if (rdev->irq.crtc_vblank_int[4] ||
6937             atomic_read(&rdev->irq.pflip[4])) {
6938                 DRM_DEBUG("cik_irq_set: vblank 4\n");
6939                 crtc5 |= VBLANK_INTERRUPT_MASK;
6940         }
6941         if (rdev->irq.crtc_vblank_int[5] ||
6942             atomic_read(&rdev->irq.pflip[5])) {
6943                 DRM_DEBUG("cik_irq_set: vblank 5\n");
6944                 crtc6 |= VBLANK_INTERRUPT_MASK;
6945         }
6946         if (rdev->irq.hpd[0]) {
6947                 DRM_DEBUG("cik_irq_set: hpd 1\n");
6948                 hpd1 |= DC_HPDx_INT_EN;
6949         }
6950         if (rdev->irq.hpd[1]) {
6951                 DRM_DEBUG("cik_irq_set: hpd 2\n");
6952                 hpd2 |= DC_HPDx_INT_EN;
6953         }
6954         if (rdev->irq.hpd[2]) {
6955                 DRM_DEBUG("cik_irq_set: hpd 3\n");
6956                 hpd3 |= DC_HPDx_INT_EN;
6957         }
6958         if (rdev->irq.hpd[3]) {
6959                 DRM_DEBUG("cik_irq_set: hpd 4\n");
6960                 hpd4 |= DC_HPDx_INT_EN;
6961         }
6962         if (rdev->irq.hpd[4]) {
6963                 DRM_DEBUG("cik_irq_set: hpd 5\n");
6964                 hpd5 |= DC_HPDx_INT_EN;
6965         }
6966         if (rdev->irq.hpd[5]) {
6967                 DRM_DEBUG("cik_irq_set: hpd 6\n");
6968                 hpd6 |= DC_HPDx_INT_EN;
6969         }
6970
6971         if (rdev->irq.dpm_thermal) {
6972                 DRM_DEBUG("dpm thermal\n");
6973                 if (rdev->flags & RADEON_IS_IGP)
6974                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6975                 else
6976                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6977         }
6978
6979         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6980
6981         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6982         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6983
6984         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6985         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6986         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6987         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6988         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6989         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6990         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6991         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6992
6993         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6994
6995         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6996         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6997         if (rdev->num_crtc >= 4) {
6998                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6999                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7000         }
7001         if (rdev->num_crtc >= 6) {
7002                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7003                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7004         }
7005
7006         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7007         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7008         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7009         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7010         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7011         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7012
7013         if (rdev->flags & RADEON_IS_IGP)
7014                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7015         else
7016                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
7017
7018         return 0;
7019 }
7020
7021 /**
7022  * cik_irq_ack - ack interrupt sources
7023  *
7024  * @rdev: radeon_device pointer
7025  *
7026  * Ack interrupt sources on the GPU (vblanks, hpd,
7027  * etc.) (CIK).  Certain interrupts sources are sw
7028  * generated and do not require an explicit ack.
7029  */
7030 static inline void cik_irq_ack(struct radeon_device *rdev)
7031 {
7032         u32 tmp;
7033
7034         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7035         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7036         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7037         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7038         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7039         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7040         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7041
7042         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7043                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7044         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7045                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7046         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7047                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7048         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7049                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7050
7051         if (rdev->num_crtc >= 4) {
7052                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7053                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7054                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7055                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7056                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7057                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7058                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7059                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7060         }
7061
7062         if (rdev->num_crtc >= 6) {
7063                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7064                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7065                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7066                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7067                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7068                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7069                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7070                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7071         }
7072
7073         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7074                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7075                 tmp |= DC_HPDx_INT_ACK;
7076                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7077         }
7078         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7079                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7080                 tmp |= DC_HPDx_INT_ACK;
7081                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7082         }
7083         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7084                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7085                 tmp |= DC_HPDx_INT_ACK;
7086                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7087         }
7088         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7089                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7090                 tmp |= DC_HPDx_INT_ACK;
7091                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7092         }
7093         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7094                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7095                 tmp |= DC_HPDx_INT_ACK;
7096                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7097         }
7098         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7099                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7100                 tmp |= DC_HPDx_INT_ACK;
7101                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7102         }
7103 }
7104
7105 /**
7106  * cik_irq_disable - disable interrupts
7107  *
7108  * @rdev: radeon_device pointer
7109  *
7110  * Disable interrupts on the hw (CIK).
7111  */
7112 static void cik_irq_disable(struct radeon_device *rdev)
7113 {
7114         cik_disable_interrupts(rdev);
7115         /* Wait and acknowledge irq */
7116         mdelay(1);
7117         cik_irq_ack(rdev);
7118         cik_disable_interrupt_state(rdev);
7119 }
7120
7121 /**
7122  * cik_irq_disable - disable interrupts for suspend
7123  *
7124  * @rdev: radeon_device pointer
7125  *
7126  * Disable interrupts and stop the RLC (CIK).
7127  * Used for suspend.
7128  */
7129 static void cik_irq_suspend(struct radeon_device *rdev)
7130 {
7131         cik_irq_disable(rdev);
7132         cik_rlc_stop(rdev);
7133 }
7134
7135 /**
7136  * cik_irq_fini - tear down interrupt support
7137  *
7138  * @rdev: radeon_device pointer
7139  *
7140  * Disable interrupts on the hw and free the IH ring
7141  * buffer (CIK).
7142  * Used for driver unload.
7143  */
7144 static void cik_irq_fini(struct radeon_device *rdev)
7145 {
7146         cik_irq_suspend(rdev);
7147         r600_ih_ring_fini(rdev);
7148 }
7149
7150 /**
7151  * cik_get_ih_wptr - get the IH ring buffer wptr
7152  *
7153  * @rdev: radeon_device pointer
7154  *
7155  * Get the IH ring buffer wptr from either the register
7156  * or the writeback memory buffer (CIK).  Also check for
7157  * ring buffer overflow and deal with it.
7158  * Used by cik_irq_process().
7159  * Returns the value of the wptr.
7160  */
7161 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7162 {
7163         u32 wptr, tmp;
7164
7165         if (rdev->wb.enabled)
7166                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7167         else
7168                 wptr = RREG32(IH_RB_WPTR);
7169
7170         if (wptr & RB_OVERFLOW) {
7171                 /* When a ring buffer overflow happen start parsing interrupt
7172                  * from the last not overwritten vector (wptr + 16). Hopefully
7173                  * this should allow us to catchup.
7174                  */
7175                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7176                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7177                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7178                 tmp = RREG32(IH_RB_CNTL);
7179                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7180                 WREG32(IH_RB_CNTL, tmp);
7181         }
7182         return (wptr & rdev->ih.ptr_mask);
7183 }
7184
7185 /*        CIK IV Ring
7186  * Each IV ring entry is 128 bits:
7187  * [7:0]    - interrupt source id
7188  * [31:8]   - reserved
7189  * [59:32]  - interrupt source data
7190  * [63:60]  - reserved
7191  * [71:64]  - RINGID
7192  *            CP:
7193  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7194  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7195  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7196  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7197  *            PIPE_ID - ME0 0=3D
7198  *                    - ME1&2 compute dispatcher (4 pipes each)
7199  *            SDMA:
7200  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7201  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7202  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7203  * [79:72]  - VMID
7204  * [95:80]  - PASID
7205  * [127:96] - reserved
7206  */
7207 /**
7208  * cik_irq_process - interrupt handler
7209  *
7210  * @rdev: radeon_device pointer
7211  *
7212  * Interrupt hander (CIK).  Walk the IH ring,
7213  * ack interrupts and schedule work to handle
7214  * interrupt events.
7215  * Returns irq process return code.
7216  */
7217 int cik_irq_process(struct radeon_device *rdev)
7218 {
7219         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7220         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7221         u32 wptr;
7222         u32 rptr;
7223         u32 src_id, src_data, ring_id;
7224         u8 me_id, pipe_id, queue_id;
7225         u32 ring_index;
7226         bool queue_hotplug = false;
7227         bool queue_reset = false;
7228         u32 addr, status, mc_client;
7229         bool queue_thermal = false;
7230
7231         if (!rdev->ih.enabled || rdev->shutdown)
7232                 return IRQ_NONE;
7233
7234         wptr = cik_get_ih_wptr(rdev);
7235
7236 restart_ih:
7237         /* is somebody else already processing irqs? */
7238         if (atomic_xchg(&rdev->ih.lock, 1))
7239                 return IRQ_NONE;
7240
7241         rptr = rdev->ih.rptr;
7242         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7243
7244         /* Order reading of wptr vs. reading of IH ring data */
7245         rmb();
7246
7247         /* display interrupts */
7248         cik_irq_ack(rdev);
7249
7250         while (rptr != wptr) {
7251                 /* wptr/rptr are in bytes! */
7252                 ring_index = rptr / 4;
7253                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7254                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7255                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7256
7257                 switch (src_id) {
7258                 case 1: /* D1 vblank/vline */
7259                         switch (src_data) {
7260                         case 0: /* D1 vblank */
7261                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7262                                         if (rdev->irq.crtc_vblank_int[0]) {
7263                                                 drm_handle_vblank(rdev->ddev, 0);
7264                                                 rdev->pm.vblank_sync = true;
7265                                                 wake_up(&rdev->irq.vblank_queue);
7266                                         }
7267                                         if (atomic_read(&rdev->irq.pflip[0]))
7268                                                 radeon_crtc_handle_flip(rdev, 0);
7269                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7270                                         DRM_DEBUG("IH: D1 vblank\n");
7271                                 }
7272                                 break;
7273                         case 1: /* D1 vline */
7274                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7275                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7276                                         DRM_DEBUG("IH: D1 vline\n");
7277                                 }
7278                                 break;
7279                         default:
7280                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7281                                 break;
7282                         }
7283                         break;
7284                 case 2: /* D2 vblank/vline */
7285                         switch (src_data) {
7286                         case 0: /* D2 vblank */
7287                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7288                                         if (rdev->irq.crtc_vblank_int[1]) {
7289                                                 drm_handle_vblank(rdev->ddev, 1);
7290                                                 rdev->pm.vblank_sync = true;
7291                                                 wake_up(&rdev->irq.vblank_queue);
7292                                         }
7293                                         if (atomic_read(&rdev->irq.pflip[1]))
7294                                                 radeon_crtc_handle_flip(rdev, 1);
7295                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7296                                         DRM_DEBUG("IH: D2 vblank\n");
7297                                 }
7298                                 break;
7299                         case 1: /* D2 vline */
7300                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7301                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7302                                         DRM_DEBUG("IH: D2 vline\n");
7303                                 }
7304                                 break;
7305                         default:
7306                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7307                                 break;
7308                         }
7309                         break;
7310                 case 3: /* D3 vblank/vline */
7311                         switch (src_data) {
7312                         case 0: /* D3 vblank */
7313                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7314                                         if (rdev->irq.crtc_vblank_int[2]) {
7315                                                 drm_handle_vblank(rdev->ddev, 2);
7316                                                 rdev->pm.vblank_sync = true;
7317                                                 wake_up(&rdev->irq.vblank_queue);
7318                                         }
7319                                         if (atomic_read(&rdev->irq.pflip[2]))
7320                                                 radeon_crtc_handle_flip(rdev, 2);
7321                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7322                                         DRM_DEBUG("IH: D3 vblank\n");
7323                                 }
7324                                 break;
7325                         case 1: /* D3 vline */
7326                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7327                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7328                                         DRM_DEBUG("IH: D3 vline\n");
7329                                 }
7330                                 break;
7331                         default:
7332                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7333                                 break;
7334                         }
7335                         break;
7336                 case 4: /* D4 vblank/vline */
7337                         switch (src_data) {
7338                         case 0: /* D4 vblank */
7339                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7340                                         if (rdev->irq.crtc_vblank_int[3]) {
7341                                                 drm_handle_vblank(rdev->ddev, 3);
7342                                                 rdev->pm.vblank_sync = true;
7343                                                 wake_up(&rdev->irq.vblank_queue);
7344                                         }
7345                                         if (atomic_read(&rdev->irq.pflip[3]))
7346                                                 radeon_crtc_handle_flip(rdev, 3);
7347                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7348                                         DRM_DEBUG("IH: D4 vblank\n");
7349                                 }
7350                                 break;
7351                         case 1: /* D4 vline */
7352                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7353                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7354                                         DRM_DEBUG("IH: D4 vline\n");
7355                                 }
7356                                 break;
7357                         default:
7358                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7359                                 break;
7360                         }
7361                         break;
7362                 case 5: /* D5 vblank/vline */
7363                         switch (src_data) {
7364                         case 0: /* D5 vblank */
7365                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7366                                         if (rdev->irq.crtc_vblank_int[4]) {
7367                                                 drm_handle_vblank(rdev->ddev, 4);
7368                                                 rdev->pm.vblank_sync = true;
7369                                                 wake_up(&rdev->irq.vblank_queue);
7370                                         }
7371                                         if (atomic_read(&rdev->irq.pflip[4]))
7372                                                 radeon_crtc_handle_flip(rdev, 4);
7373                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7374                                         DRM_DEBUG("IH: D5 vblank\n");
7375                                 }
7376                                 break;
7377                         case 1: /* D5 vline */
7378                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7379                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7380                                         DRM_DEBUG("IH: D5 vline\n");
7381                                 }
7382                                 break;
7383                         default:
7384                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7385                                 break;
7386                         }
7387                         break;
7388                 case 6: /* D6 vblank/vline */
7389                         switch (src_data) {
7390                         case 0: /* D6 vblank */
7391                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7392                                         if (rdev->irq.crtc_vblank_int[5]) {
7393                                                 drm_handle_vblank(rdev->ddev, 5);
7394                                                 rdev->pm.vblank_sync = true;
7395                                                 wake_up(&rdev->irq.vblank_queue);
7396                                         }
7397                                         if (atomic_read(&rdev->irq.pflip[5]))
7398                                                 radeon_crtc_handle_flip(rdev, 5);
7399                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7400                                         DRM_DEBUG("IH: D6 vblank\n");
7401                                 }
7402                                 break;
7403                         case 1: /* D6 vline */
7404                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7405                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7406                                         DRM_DEBUG("IH: D6 vline\n");
7407                                 }
7408                                 break;
7409                         default:
7410                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7411                                 break;
7412                         }
7413                         break;
7414                 case 42: /* HPD hotplug */
7415                         switch (src_data) {
7416                         case 0:
7417                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7418                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7419                                         queue_hotplug = true;
7420                                         DRM_DEBUG("IH: HPD1\n");
7421                                 }
7422                                 break;
7423                         case 1:
7424                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7425                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7426                                         queue_hotplug = true;
7427                                         DRM_DEBUG("IH: HPD2\n");
7428                                 }
7429                                 break;
7430                         case 2:
7431                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7432                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7433                                         queue_hotplug = true;
7434                                         DRM_DEBUG("IH: HPD3\n");
7435                                 }
7436                                 break;
7437                         case 3:
7438                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7439                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7440                                         queue_hotplug = true;
7441                                         DRM_DEBUG("IH: HPD4\n");
7442                                 }
7443                                 break;
7444                         case 4:
7445                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7446                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7447                                         queue_hotplug = true;
7448                                         DRM_DEBUG("IH: HPD5\n");
7449                                 }
7450                                 break;
7451                         case 5:
7452                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7453                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7454                                         queue_hotplug = true;
7455                                         DRM_DEBUG("IH: HPD6\n");
7456                                 }
7457                                 break;
7458                         default:
7459                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7460                                 break;
7461                         }
7462                         break;
7463                 case 124: /* UVD */
7464                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7465                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7466                         break;
7467                 case 146:
7468                 case 147:
7469                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7470                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7471                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7472                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7473                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7474                                 addr);
7475                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7476                                 status);
7477                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7478                         /* reset addr and status */
7479                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7480                         break;
7481                 case 176: /* GFX RB CP_INT */
7482                 case 177: /* GFX IB CP_INT */
7483                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7484                         break;
7485                 case 181: /* CP EOP event */
7486                         DRM_DEBUG("IH: CP EOP\n");
7487                         /* XXX check the bitfield order! */
7488                         me_id = (ring_id & 0x60) >> 5;
7489                         pipe_id = (ring_id & 0x18) >> 3;
7490                         queue_id = (ring_id & 0x7) >> 0;
7491                         switch (me_id) {
7492                         case 0:
7493                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7494                                 break;
7495                         case 1:
7496                         case 2:
7497                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7498                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7499                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7500                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7501                                 break;
7502                         }
7503                         break;
7504                 case 184: /* CP Privileged reg access */
7505                         DRM_ERROR("Illegal register access in command stream\n");
7506                         /* XXX check the bitfield order! */
7507                         me_id = (ring_id & 0x60) >> 5;
7508                         pipe_id = (ring_id & 0x18) >> 3;
7509                         queue_id = (ring_id & 0x7) >> 0;
7510                         switch (me_id) {
7511                         case 0:
7512                                 /* This results in a full GPU reset, but all we need to do is soft
7513                                  * reset the CP for gfx
7514                                  */
7515                                 queue_reset = true;
7516                                 break;
7517                         case 1:
7518                                 /* XXX compute */
7519                                 queue_reset = true;
7520                                 break;
7521                         case 2:
7522                                 /* XXX compute */
7523                                 queue_reset = true;
7524                                 break;
7525                         }
7526                         break;
7527                 case 185: /* CP Privileged inst */
7528                         DRM_ERROR("Illegal instruction in command stream\n");
7529                         /* XXX check the bitfield order! */
7530                         me_id = (ring_id & 0x60) >> 5;
7531                         pipe_id = (ring_id & 0x18) >> 3;
7532                         queue_id = (ring_id & 0x7) >> 0;
7533                         switch (me_id) {
7534                         case 0:
7535                                 /* This results in a full GPU reset, but all we need to do is soft
7536                                  * reset the CP for gfx
7537                                  */
7538                                 queue_reset = true;
7539                                 break;
7540                         case 1:
7541                                 /* XXX compute */
7542                                 queue_reset = true;
7543                                 break;
7544                         case 2:
7545                                 /* XXX compute */
7546                                 queue_reset = true;
7547                                 break;
7548                         }
7549                         break;
7550                 case 224: /* SDMA trap event */
7551                         /* XXX check the bitfield order! */
7552                         me_id = (ring_id & 0x3) >> 0;
7553                         queue_id = (ring_id & 0xc) >> 2;
7554                         DRM_DEBUG("IH: SDMA trap\n");
7555                         switch (me_id) {
7556                         case 0:
7557                                 switch (queue_id) {
7558                                 case 0:
7559                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7560                                         break;
7561                                 case 1:
7562                                         /* XXX compute */
7563                                         break;
7564                                 case 2:
7565                                         /* XXX compute */
7566                                         break;
7567                                 }
7568                                 break;
7569                         case 1:
7570                                 switch (queue_id) {
7571                                 case 0:
7572                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7573                                         break;
7574                                 case 1:
7575                                         /* XXX compute */
7576                                         break;
7577                                 case 2:
7578                                         /* XXX compute */
7579                                         break;
7580                                 }
7581                                 break;
7582                         }
7583                         break;
7584                 case 230: /* thermal low to high */
7585                         DRM_DEBUG("IH: thermal low to high\n");
7586                         rdev->pm.dpm.thermal.high_to_low = false;
7587                         queue_thermal = true;
7588                         break;
7589                 case 231: /* thermal high to low */
7590                         DRM_DEBUG("IH: thermal high to low\n");
7591                         rdev->pm.dpm.thermal.high_to_low = true;
7592                         queue_thermal = true;
7593                         break;
7594                 case 233: /* GUI IDLE */
7595                         DRM_DEBUG("IH: GUI idle\n");
7596                         break;
7597                 case 241: /* SDMA Privileged inst */
7598                 case 247: /* SDMA Privileged inst */
7599                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
7600                         /* XXX check the bitfield order! */
7601                         me_id = (ring_id & 0x3) >> 0;
7602                         queue_id = (ring_id & 0xc) >> 2;
7603                         switch (me_id) {
7604                         case 0:
7605                                 switch (queue_id) {
7606                                 case 0:
7607                                         queue_reset = true;
7608                                         break;
7609                                 case 1:
7610                                         /* XXX compute */
7611                                         queue_reset = true;
7612                                         break;
7613                                 case 2:
7614                                         /* XXX compute */
7615                                         queue_reset = true;
7616                                         break;
7617                                 }
7618                                 break;
7619                         case 1:
7620                                 switch (queue_id) {
7621                                 case 0:
7622                                         queue_reset = true;
7623                                         break;
7624                                 case 1:
7625                                         /* XXX compute */
7626                                         queue_reset = true;
7627                                         break;
7628                                 case 2:
7629                                         /* XXX compute */
7630                                         queue_reset = true;
7631                                         break;
7632                                 }
7633                                 break;
7634                         }
7635                         break;
7636                 default:
7637                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7638                         break;
7639                 }
7640
7641                 /* wptr/rptr are in bytes! */
7642                 rptr += 16;
7643                 rptr &= rdev->ih.ptr_mask;
7644         }
7645         if (queue_hotplug)
7646                 schedule_work(&rdev->hotplug_work);
7647         if (queue_reset)
7648                 schedule_work(&rdev->reset_work);
7649         if (queue_thermal)
7650                 schedule_work(&rdev->pm.dpm.thermal.work);
7651         rdev->ih.rptr = rptr;
7652         WREG32(IH_RB_RPTR, rdev->ih.rptr);
7653         atomic_set(&rdev->ih.lock, 0);
7654
7655         /* make sure wptr hasn't changed while processing */
7656         wptr = cik_get_ih_wptr(rdev);
7657         if (wptr != rptr)
7658                 goto restart_ih;
7659
7660         return IRQ_HANDLED;
7661 }
7662
7663 /*
7664  * startup/shutdown callbacks
7665  */
7666 /**
7667  * cik_startup - program the asic to a functional state
7668  *
7669  * @rdev: radeon_device pointer
7670  *
7671  * Programs the asic to a functional state (CIK).
7672  * Called by cik_init() and cik_resume().
7673  * Returns 0 for success, error for failure.
7674  */
7675 static int cik_startup(struct radeon_device *rdev)
7676 {
7677         struct radeon_ring *ring;
7678         int r;
7679
7680         /* enable pcie gen2/3 link */
7681         cik_pcie_gen3_enable(rdev);
7682         /* enable aspm */
7683         cik_program_aspm(rdev);
7684
7685         /* scratch needs to be initialized before MC */
7686         r = r600_vram_scratch_init(rdev);
7687         if (r)
7688                 return r;
7689
7690         cik_mc_program(rdev);
7691
7692         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
7693                 r = ci_mc_load_microcode(rdev);
7694                 if (r) {
7695                         DRM_ERROR("Failed to load MC firmware!\n");
7696                         return r;
7697                 }
7698         }
7699
7700         r = cik_pcie_gart_enable(rdev);
7701         if (r)
7702                 return r;
7703         cik_gpu_init(rdev);
7704
7705         /* allocate rlc buffers */
7706         if (rdev->flags & RADEON_IS_IGP) {
7707                 if (rdev->family == CHIP_KAVERI) {
7708                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7709                         rdev->rlc.reg_list_size =
7710                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7711                 } else {
7712                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7713                         rdev->rlc.reg_list_size =
7714                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7715                 }
7716         }
7717         rdev->rlc.cs_data = ci_cs_data;
7718         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7719         r = sumo_rlc_init(rdev);
7720         if (r) {
7721                 DRM_ERROR("Failed to init rlc BOs!\n");
7722                 return r;
7723         }
7724
7725         /* allocate wb buffer */
7726         r = radeon_wb_init(rdev);
7727         if (r)
7728                 return r;
7729
7730         /* allocate mec buffers */
7731         r = cik_mec_init(rdev);
7732         if (r) {
7733                 DRM_ERROR("Failed to init MEC BOs!\n");
7734                 return r;
7735         }
7736
7737         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7738         if (r) {
7739                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7740                 return r;
7741         }
7742
7743         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7744         if (r) {
7745                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7746                 return r;
7747         }
7748
7749         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7750         if (r) {
7751                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7752                 return r;
7753         }
7754
7755         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7756         if (r) {
7757                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7758                 return r;
7759         }
7760
7761         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7762         if (r) {
7763                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7764                 return r;
7765         }
7766
7767         r = radeon_uvd_resume(rdev);
7768         if (!r) {
7769                 r = uvd_v4_2_resume(rdev);
7770                 if (!r) {
7771                         r = radeon_fence_driver_start_ring(rdev,
7772                                                            R600_RING_TYPE_UVD_INDEX);
7773                         if (r)
7774                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7775                 }
7776         }
7777         if (r)
7778                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7779
7780         /* Enable IRQ */
7781         if (!rdev->irq.installed) {
7782                 r = radeon_irq_kms_init(rdev);
7783                 if (r)
7784                         return r;
7785         }
7786
7787         r = cik_irq_init(rdev);
7788         if (r) {
7789                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7790                 radeon_irq_kms_fini(rdev);
7791                 return r;
7792         }
7793         cik_irq_set(rdev);
7794
7795         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7796         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7797                              PACKET3(PACKET3_NOP, 0x3FFF));
7798         if (r)
7799                 return r;
7800
7801         /* set up the compute queues */
7802         /* type-2 packets are deprecated on MEC, use type-3 instead */
7803         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7804         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7805                              PACKET3(PACKET3_NOP, 0x3FFF));
7806         if (r)
7807                 return r;
7808         ring->me = 1; /* first MEC */
7809         ring->pipe = 0; /* first pipe */
7810         ring->queue = 0; /* first queue */
7811         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7812
7813         /* type-2 packets are deprecated on MEC, use type-3 instead */
7814         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7815         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7816                              PACKET3(PACKET3_NOP, 0x3FFF));
7817         if (r)
7818                 return r;
7819         /* dGPU only have 1 MEC */
7820         ring->me = 1; /* first MEC */
7821         ring->pipe = 0; /* first pipe */
7822         ring->queue = 1; /* second queue */
7823         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7824
7825         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7826         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7827                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7828         if (r)
7829                 return r;
7830
7831         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7832         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7833                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7834         if (r)
7835                 return r;
7836
7837         r = cik_cp_resume(rdev);
7838         if (r)
7839                 return r;
7840
7841         r = cik_sdma_resume(rdev);
7842         if (r)
7843                 return r;
7844
7845         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7846         if (ring->ring_size) {
7847                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7848                                      RADEON_CP_PACKET2);
7849                 if (!r)
7850                         r = uvd_v1_0_init(rdev);
7851                 if (r)
7852                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7853         }
7854
7855         r = radeon_ib_pool_init(rdev);
7856         if (r) {
7857                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7858                 return r;
7859         }
7860
7861         r = radeon_vm_manager_init(rdev);
7862         if (r) {
7863                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7864                 return r;
7865         }
7866
7867         r = dce6_audio_init(rdev);
7868         if (r)
7869                 return r;
7870
7871         return 0;
7872 }
7873
7874 /**
7875  * cik_resume - resume the asic to a functional state
7876  *
7877  * @rdev: radeon_device pointer
7878  *
7879  * Programs the asic to a functional state (CIK).
7880  * Called at resume.
7881  * Returns 0 for success, error for failure.
7882  */
7883 int cik_resume(struct radeon_device *rdev)
7884 {
7885         int r;
7886
7887         /* post card */
7888         atom_asic_init(rdev->mode_info.atom_context);
7889
7890         /* init golden registers */
7891         cik_init_golden_registers(rdev);
7892
7893         radeon_pm_resume(rdev);
7894
7895         rdev->accel_working = true;
7896         r = cik_startup(rdev);
7897         if (r) {
7898                 DRM_ERROR("cik startup failed on resume\n");
7899                 rdev->accel_working = false;
7900                 return r;
7901         }
7902
7903         return r;
7904
7905 }
7906
7907 /**
7908  * cik_suspend - suspend the asic
7909  *
7910  * @rdev: radeon_device pointer
7911  *
7912  * Bring the chip into a state suitable for suspend (CIK).
7913  * Called at suspend.
7914  * Returns 0 for success.
7915  */
7916 int cik_suspend(struct radeon_device *rdev)
7917 {
7918         radeon_pm_suspend(rdev);
7919         dce6_audio_fini(rdev);
7920         radeon_vm_manager_fini(rdev);
7921         cik_cp_enable(rdev, false);
7922         cik_sdma_enable(rdev, false);
7923         uvd_v1_0_fini(rdev);
7924         radeon_uvd_suspend(rdev);
7925         cik_fini_pg(rdev);
7926         cik_fini_cg(rdev);
7927         cik_irq_suspend(rdev);
7928         radeon_wb_disable(rdev);
7929         cik_pcie_gart_disable(rdev);
7930         return 0;
7931 }
7932
7933 /* Plan is to move initialization in that function and use
7934  * helper function so that radeon_device_init pretty much
7935  * do nothing more than calling asic specific function. This
7936  * should also allow to remove a bunch of callback function
7937  * like vram_info.
7938  */
7939 /**
7940  * cik_init - asic specific driver and hw init
7941  *
7942  * @rdev: radeon_device pointer
7943  *
7944  * Setup asic specific driver variables and program the hw
7945  * to a functional state (CIK).
7946  * Called at driver startup.
7947  * Returns 0 for success, errors for failure.
7948  */
7949 int cik_init(struct radeon_device *rdev)
7950 {
7951         struct radeon_ring *ring;
7952         int r;
7953
7954         /* Read BIOS */
7955         if (!radeon_get_bios(rdev)) {
7956                 if (ASIC_IS_AVIVO(rdev))
7957                         return -EINVAL;
7958         }
7959         /* Must be an ATOMBIOS */
7960         if (!rdev->is_atom_bios) {
7961                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7962                 return -EINVAL;
7963         }
7964         r = radeon_atombios_init(rdev);
7965         if (r)
7966                 return r;
7967
7968         /* Post card if necessary */
7969         if (!radeon_card_posted(rdev)) {
7970                 if (!rdev->bios) {
7971                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7972                         return -EINVAL;
7973                 }
7974                 DRM_INFO("GPU not posted. posting now...\n");
7975                 atom_asic_init(rdev->mode_info.atom_context);
7976         }
7977         /* init golden registers */
7978         cik_init_golden_registers(rdev);
7979         /* Initialize scratch registers */
7980         cik_scratch_init(rdev);
7981         /* Initialize surface registers */
7982         radeon_surface_init(rdev);
7983         /* Initialize clocks */
7984         radeon_get_clock_info(rdev->ddev);
7985
7986         /* Fence driver */
7987         r = radeon_fence_driver_init(rdev);
7988         if (r)
7989                 return r;
7990
7991         /* initialize memory controller */
7992         r = cik_mc_init(rdev);
7993         if (r)
7994                 return r;
7995         /* Memory manager */
7996         r = radeon_bo_init(rdev);
7997         if (r)
7998                 return r;
7999
8000         if (rdev->flags & RADEON_IS_IGP) {
8001                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8002                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8003                         r = cik_init_microcode(rdev);
8004                         if (r) {
8005                                 DRM_ERROR("Failed to load firmware!\n");
8006                                 return r;
8007                         }
8008                 }
8009         } else {
8010                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8011                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8012                     !rdev->mc_fw) {
8013                         r = cik_init_microcode(rdev);
8014                         if (r) {
8015                                 DRM_ERROR("Failed to load firmware!\n");
8016                                 return r;
8017                         }
8018                 }
8019         }
8020
8021         /* Initialize power management */
8022         radeon_pm_init(rdev);
8023
8024         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8025         ring->ring_obj = NULL;
8026         r600_ring_init(rdev, ring, 1024 * 1024);
8027
8028         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8029         ring->ring_obj = NULL;
8030         r600_ring_init(rdev, ring, 1024 * 1024);
8031         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8032         if (r)
8033                 return r;
8034
8035         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8036         ring->ring_obj = NULL;
8037         r600_ring_init(rdev, ring, 1024 * 1024);
8038         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8039         if (r)
8040                 return r;
8041
8042         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8043         ring->ring_obj = NULL;
8044         r600_ring_init(rdev, ring, 256 * 1024);
8045
8046         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8047         ring->ring_obj = NULL;
8048         r600_ring_init(rdev, ring, 256 * 1024);
8049
8050         r = radeon_uvd_init(rdev);
8051         if (!r) {
8052                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8053                 ring->ring_obj = NULL;
8054                 r600_ring_init(rdev, ring, 4096);
8055         }
8056
8057         rdev->ih.ring_obj = NULL;
8058         r600_ih_ring_init(rdev, 64 * 1024);
8059
8060         r = r600_pcie_gart_init(rdev);
8061         if (r)
8062                 return r;
8063
8064         rdev->accel_working = true;
8065         r = cik_startup(rdev);
8066         if (r) {
8067                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8068                 cik_cp_fini(rdev);
8069                 cik_sdma_fini(rdev);
8070                 cik_irq_fini(rdev);
8071                 sumo_rlc_fini(rdev);
8072                 cik_mec_fini(rdev);
8073                 radeon_wb_fini(rdev);
8074                 radeon_ib_pool_fini(rdev);
8075                 radeon_vm_manager_fini(rdev);
8076                 radeon_irq_kms_fini(rdev);
8077                 cik_pcie_gart_fini(rdev);
8078                 rdev->accel_working = false;
8079         }
8080
8081         /* Don't start up if the MC ucode is missing.
8082          * The default clocks and voltages before the MC ucode
8083          * is loaded are not suffient for advanced operations.
8084          */
8085         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8086                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8087                 return -EINVAL;
8088         }
8089
8090         return 0;
8091 }
8092
8093 /**
8094  * cik_fini - asic specific driver and hw fini
8095  *
8096  * @rdev: radeon_device pointer
8097  *
8098  * Tear down the asic specific driver variables and program the hw
8099  * to an idle state (CIK).
8100  * Called at driver unload.
8101  */
8102 void cik_fini(struct radeon_device *rdev)
8103 {
8104         radeon_pm_fini(rdev);
8105         cik_cp_fini(rdev);
8106         cik_sdma_fini(rdev);
8107         cik_fini_pg(rdev);
8108         cik_fini_cg(rdev);
8109         cik_irq_fini(rdev);
8110         sumo_rlc_fini(rdev);
8111         cik_mec_fini(rdev);
8112         radeon_wb_fini(rdev);
8113         radeon_vm_manager_fini(rdev);
8114         radeon_ib_pool_fini(rdev);
8115         radeon_irq_kms_fini(rdev);
8116         uvd_v1_0_fini(rdev);
8117         radeon_uvd_fini(rdev);
8118         cik_pcie_gart_fini(rdev);
8119         r600_vram_scratch_fini(rdev);
8120         radeon_gem_fini(rdev);
8121         radeon_fence_driver_fini(rdev);
8122         radeon_bo_fini(rdev);
8123         radeon_atombios_fini(rdev);
8124         kfree(rdev->bios);
8125         rdev->bios = NULL;
8126 }
8127
8128 void dce8_program_fmt(struct drm_encoder *encoder)
8129 {
8130         struct drm_device *dev = encoder->dev;
8131         struct radeon_device *rdev = dev->dev_private;
8132         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8133         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8134         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8135         int bpc = 0;
8136         u32 tmp = 0;
8137         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8138
8139         if (connector) {
8140                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8141                 bpc = radeon_get_monitor_bpc(connector);
8142                 dither = radeon_connector->dither;
8143         }
8144
8145         /* LVDS/eDP FMT is set up by atom */
8146         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8147                 return;
8148
8149         /* not needed for analog */
8150         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8151             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8152                 return;
8153
8154         if (bpc == 0)
8155                 return;
8156
8157         switch (bpc) {
8158         case 6:
8159                 if (dither == RADEON_FMT_DITHER_ENABLE)
8160                         /* XXX sort out optimal dither settings */
8161                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8162                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8163                 else
8164                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8165                 break;
8166         case 8:
8167                 if (dither == RADEON_FMT_DITHER_ENABLE)
8168                         /* XXX sort out optimal dither settings */
8169                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8170                                 FMT_RGB_RANDOM_ENABLE |
8171                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8172                 else
8173                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8174                 break;
8175         case 10:
8176                 if (dither == RADEON_FMT_DITHER_ENABLE)
8177                         /* XXX sort out optimal dither settings */
8178                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8179                                 FMT_RGB_RANDOM_ENABLE |
8180                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8181                 else
8182                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8183                 break;
8184         default:
8185                 /* not needed */
8186                 break;
8187         }
8188
8189         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8190 }
8191
8192 /* display watermark setup */
8193 /**
8194  * dce8_line_buffer_adjust - Set up the line buffer
8195  *
8196  * @rdev: radeon_device pointer
8197  * @radeon_crtc: the selected display controller
8198  * @mode: the current display mode on the selected display
8199  * controller
8200  *
8201  * Setup up the line buffer allocation for
8202  * the selected display controller (CIK).
8203  * Returns the line buffer size in pixels.
8204  */
8205 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8206                                    struct radeon_crtc *radeon_crtc,
8207                                    struct drm_display_mode *mode)
8208 {
8209         u32 tmp, buffer_alloc, i;
8210         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8211         /*
8212          * Line Buffer Setup
8213          * There are 6 line buffers, one for each display controllers.
8214          * There are 3 partitions per LB. Select the number of partitions
8215          * to enable based on the display width.  For display widths larger
8216          * than 4096, you need use to use 2 display controllers and combine
8217          * them using the stereo blender.
8218          */
8219         if (radeon_crtc->base.enabled && mode) {
8220                 if (mode->crtc_hdisplay < 1920) {
8221                         tmp = 1;
8222                         buffer_alloc = 2;
8223                 } else if (mode->crtc_hdisplay < 2560) {
8224                         tmp = 2;
8225                         buffer_alloc = 2;
8226                 } else if (mode->crtc_hdisplay < 4096) {
8227                         tmp = 0;
8228                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8229                 } else {
8230                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8231                         tmp = 0;
8232                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8233                 }
8234         } else {
8235                 tmp = 1;
8236                 buffer_alloc = 0;
8237         }
8238
8239         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8240                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8241
8242         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8243                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8244         for (i = 0; i < rdev->usec_timeout; i++) {
8245                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8246                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8247                         break;
8248                 udelay(1);
8249         }
8250
8251         if (radeon_crtc->base.enabled && mode) {
8252                 switch (tmp) {
8253                 case 0:
8254                 default:
8255                         return 4096 * 2;
8256                 case 1:
8257                         return 1920 * 2;
8258                 case 2:
8259                         return 2560 * 2;
8260                 }
8261         }
8262
8263         /* controller not enabled, so no lb used */
8264         return 0;
8265 }
8266
8267 /**
8268  * cik_get_number_of_dram_channels - get the number of dram channels
8269  *
8270  * @rdev: radeon_device pointer
8271  *
8272  * Look up the number of video ram channels (CIK).
8273  * Used for display watermark bandwidth calculations
8274  * Returns the number of dram channels
8275  */
8276 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8277 {
8278         u32 tmp = RREG32(MC_SHARED_CHMAP);
8279
8280         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8281         case 0:
8282         default:
8283                 return 1;
8284         case 1:
8285                 return 2;
8286         case 2:
8287                 return 4;
8288         case 3:
8289                 return 8;
8290         case 4:
8291                 return 3;
8292         case 5:
8293                 return 6;
8294         case 6:
8295                 return 10;
8296         case 7:
8297                 return 12;
8298         case 8:
8299                 return 16;
8300         }
8301 }
8302
8303 struct dce8_wm_params {
8304         u32 dram_channels; /* number of dram channels */
8305         u32 yclk;          /* bandwidth per dram data pin in kHz */
8306         u32 sclk;          /* engine clock in kHz */
8307         u32 disp_clk;      /* display clock in kHz */
8308         u32 src_width;     /* viewport width */
8309         u32 active_time;   /* active display time in ns */
8310         u32 blank_time;    /* blank time in ns */
8311         bool interlaced;    /* mode is interlaced */
8312         fixed20_12 vsc;    /* vertical scale ratio */
8313         u32 num_heads;     /* number of active crtcs */
8314         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8315         u32 lb_size;       /* line buffer allocated to pipe */
8316         u32 vtaps;         /* vertical scaler taps */
8317 };
8318
8319 /**
8320  * dce8_dram_bandwidth - get the dram bandwidth
8321  *
8322  * @wm: watermark calculation data
8323  *
8324  * Calculate the raw dram bandwidth (CIK).
8325  * Used for display watermark bandwidth calculations
8326  * Returns the dram bandwidth in MBytes/s
8327  */
8328 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8329 {
8330         /* Calculate raw DRAM Bandwidth */
8331         fixed20_12 dram_efficiency; /* 0.7 */
8332         fixed20_12 yclk, dram_channels, bandwidth;
8333         fixed20_12 a;
8334
8335         a.full = dfixed_const(1000);
8336         yclk.full = dfixed_const(wm->yclk);
8337         yclk.full = dfixed_div(yclk, a);
8338         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8339         a.full = dfixed_const(10);
8340         dram_efficiency.full = dfixed_const(7);
8341         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8342         bandwidth.full = dfixed_mul(dram_channels, yclk);
8343         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8344
8345         return dfixed_trunc(bandwidth);
8346 }
8347
8348 /**
8349  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8350  *
8351  * @wm: watermark calculation data
8352  *
8353  * Calculate the dram bandwidth used for display (CIK).
8354  * Used for display watermark bandwidth calculations
8355  * Returns the dram bandwidth for display in MBytes/s
8356  */
8357 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8358 {
8359         /* Calculate DRAM Bandwidth and the part allocated to display. */
8360         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8361         fixed20_12 yclk, dram_channels, bandwidth;
8362         fixed20_12 a;
8363
8364         a.full = dfixed_const(1000);
8365         yclk.full = dfixed_const(wm->yclk);
8366         yclk.full = dfixed_div(yclk, a);
8367         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8368         a.full = dfixed_const(10);
8369         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8370         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8371         bandwidth.full = dfixed_mul(dram_channels, yclk);
8372         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8373
8374         return dfixed_trunc(bandwidth);
8375 }
8376
8377 /**
8378  * dce8_data_return_bandwidth - get the data return bandwidth
8379  *
8380  * @wm: watermark calculation data
8381  *
8382  * Calculate the data return bandwidth used for display (CIK).
8383  * Used for display watermark bandwidth calculations
8384  * Returns the data return bandwidth in MBytes/s
8385  */
8386 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8387 {
8388         /* Calculate the display Data return Bandwidth */
8389         fixed20_12 return_efficiency; /* 0.8 */
8390         fixed20_12 sclk, bandwidth;
8391         fixed20_12 a;
8392
8393         a.full = dfixed_const(1000);
8394         sclk.full = dfixed_const(wm->sclk);
8395         sclk.full = dfixed_div(sclk, a);
8396         a.full = dfixed_const(10);
8397         return_efficiency.full = dfixed_const(8);
8398         return_efficiency.full = dfixed_div(return_efficiency, a);
8399         a.full = dfixed_const(32);
8400         bandwidth.full = dfixed_mul(a, sclk);
8401         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8402
8403         return dfixed_trunc(bandwidth);
8404 }
8405
8406 /**
8407  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8408  *
8409  * @wm: watermark calculation data
8410  *
8411  * Calculate the dmif bandwidth used for display (CIK).
8412  * Used for display watermark bandwidth calculations
8413  * Returns the dmif bandwidth in MBytes/s
8414  */
8415 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8416 {
8417         /* Calculate the DMIF Request Bandwidth */
8418         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8419         fixed20_12 disp_clk, bandwidth;
8420         fixed20_12 a, b;
8421
8422         a.full = dfixed_const(1000);
8423         disp_clk.full = dfixed_const(wm->disp_clk);
8424         disp_clk.full = dfixed_div(disp_clk, a);
8425         a.full = dfixed_const(32);
8426         b.full = dfixed_mul(a, disp_clk);
8427
8428         a.full = dfixed_const(10);
8429         disp_clk_request_efficiency.full = dfixed_const(8);
8430         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8431
8432         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8433
8434         return dfixed_trunc(bandwidth);
8435 }
8436
8437 /**
8438  * dce8_available_bandwidth - get the min available bandwidth
8439  *
8440  * @wm: watermark calculation data
8441  *
8442  * Calculate the min available bandwidth used for display (CIK).
8443  * Used for display watermark bandwidth calculations
8444  * Returns the min available bandwidth in MBytes/s
8445  */
8446 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8447 {
8448         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8449         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8450         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8451         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8452
8453         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8454 }
8455
8456 /**
8457  * dce8_average_bandwidth - get the average available bandwidth
8458  *
8459  * @wm: watermark calculation data
8460  *
8461  * Calculate the average available bandwidth used for display (CIK).
8462  * Used for display watermark bandwidth calculations
8463  * Returns the average available bandwidth in MBytes/s
8464  */
8465 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8466 {
8467         /* Calculate the display mode Average Bandwidth
8468          * DisplayMode should contain the source and destination dimensions,
8469          * timing, etc.
8470          */
8471         fixed20_12 bpp;
8472         fixed20_12 line_time;
8473         fixed20_12 src_width;
8474         fixed20_12 bandwidth;
8475         fixed20_12 a;
8476
8477         a.full = dfixed_const(1000);
8478         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8479         line_time.full = dfixed_div(line_time, a);
8480         bpp.full = dfixed_const(wm->bytes_per_pixel);
8481         src_width.full = dfixed_const(wm->src_width);
8482         bandwidth.full = dfixed_mul(src_width, bpp);
8483         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8484         bandwidth.full = dfixed_div(bandwidth, line_time);
8485
8486         return dfixed_trunc(bandwidth);
8487 }
8488
8489 /**
8490  * dce8_latency_watermark - get the latency watermark
8491  *
8492  * @wm: watermark calculation data
8493  *
8494  * Calculate the latency watermark (CIK).
8495  * Used for display watermark bandwidth calculations
8496  * Returns the latency watermark in ns
8497  */
8498 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8499 {
8500         /* First calculate the latency in ns */
8501         u32 mc_latency = 2000; /* 2000 ns. */
8502         u32 available_bandwidth = dce8_available_bandwidth(wm);
8503         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8504         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8505         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8506         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8507                 (wm->num_heads * cursor_line_pair_return_time);
8508         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8509         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8510         u32 tmp, dmif_size = 12288;
8511         fixed20_12 a, b, c;
8512
8513         if (wm->num_heads == 0)
8514                 return 0;
8515
8516         a.full = dfixed_const(2);
8517         b.full = dfixed_const(1);
8518         if ((wm->vsc.full > a.full) ||
8519             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8520             (wm->vtaps >= 5) ||
8521             ((wm->vsc.full >= a.full) && wm->interlaced))
8522                 max_src_lines_per_dst_line = 4;
8523         else
8524                 max_src_lines_per_dst_line = 2;
8525
8526         a.full = dfixed_const(available_bandwidth);
8527         b.full = dfixed_const(wm->num_heads);
8528         a.full = dfixed_div(a, b);
8529
8530         b.full = dfixed_const(mc_latency + 512);
8531         c.full = dfixed_const(wm->disp_clk);
8532         b.full = dfixed_div(b, c);
8533
8534         c.full = dfixed_const(dmif_size);
8535         b.full = dfixed_div(c, b);
8536
8537         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8538
8539         b.full = dfixed_const(1000);
8540         c.full = dfixed_const(wm->disp_clk);
8541         b.full = dfixed_div(c, b);
8542         c.full = dfixed_const(wm->bytes_per_pixel);
8543         b.full = dfixed_mul(b, c);
8544
8545         lb_fill_bw = min(tmp, dfixed_trunc(b));
8546
8547         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8548         b.full = dfixed_const(1000);
8549         c.full = dfixed_const(lb_fill_bw);
8550         b.full = dfixed_div(c, b);
8551         a.full = dfixed_div(a, b);
8552         line_fill_time = dfixed_trunc(a);
8553
8554         if (line_fill_time < wm->active_time)
8555                 return latency;
8556         else
8557                 return latency + (line_fill_time - wm->active_time);
8558
8559 }
8560
8561 /**
8562  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8563  * average and available dram bandwidth
8564  *
8565  * @wm: watermark calculation data
8566  *
8567  * Check if the display average bandwidth fits in the display
8568  * dram bandwidth (CIK).
8569  * Used for display watermark bandwidth calculations
8570  * Returns true if the display fits, false if not.
8571  */
8572 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8573 {
8574         if (dce8_average_bandwidth(wm) <=
8575             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8576                 return true;
8577         else
8578                 return false;
8579 }
8580
8581 /**
8582  * dce8_average_bandwidth_vs_available_bandwidth - check
8583  * average and available bandwidth
8584  *
8585  * @wm: watermark calculation data
8586  *
8587  * Check if the display average bandwidth fits in the display
8588  * available bandwidth (CIK).
8589  * Used for display watermark bandwidth calculations
8590  * Returns true if the display fits, false if not.
8591  */
8592 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8593 {
8594         if (dce8_average_bandwidth(wm) <=
8595             (dce8_available_bandwidth(wm) / wm->num_heads))
8596                 return true;
8597         else
8598                 return false;
8599 }
8600
8601 /**
8602  * dce8_check_latency_hiding - check latency hiding
8603  *
8604  * @wm: watermark calculation data
8605  *
8606  * Check latency hiding (CIK).
8607  * Used for display watermark bandwidth calculations
8608  * Returns true if the display fits, false if not.
8609  */
8610 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8611 {
8612         u32 lb_partitions = wm->lb_size / wm->src_width;
8613         u32 line_time = wm->active_time + wm->blank_time;
8614         u32 latency_tolerant_lines;
8615         u32 latency_hiding;
8616         fixed20_12 a;
8617
8618         a.full = dfixed_const(1);
8619         if (wm->vsc.full > a.full)
8620                 latency_tolerant_lines = 1;
8621         else {
8622                 if (lb_partitions <= (wm->vtaps + 1))
8623                         latency_tolerant_lines = 1;
8624                 else
8625                         latency_tolerant_lines = 2;
8626         }
8627
8628         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8629
8630         if (dce8_latency_watermark(wm) <= latency_hiding)
8631                 return true;
8632         else
8633                 return false;
8634 }
8635
8636 /**
8637  * dce8_program_watermarks - program display watermarks
8638  *
8639  * @rdev: radeon_device pointer
8640  * @radeon_crtc: the selected display controller
8641  * @lb_size: line buffer size
8642  * @num_heads: number of display controllers in use
8643  *
8644  * Calculate and program the display watermarks for the
8645  * selected display controller (CIK).
8646  */
8647 static void dce8_program_watermarks(struct radeon_device *rdev,
8648                                     struct radeon_crtc *radeon_crtc,
8649                                     u32 lb_size, u32 num_heads)
8650 {
8651         struct drm_display_mode *mode = &radeon_crtc->base.mode;
8652         struct dce8_wm_params wm_low, wm_high;
8653         u32 pixel_period;
8654         u32 line_time = 0;
8655         u32 latency_watermark_a = 0, latency_watermark_b = 0;
8656         u32 tmp, wm_mask;
8657
8658         if (radeon_crtc->base.enabled && num_heads && mode) {
8659                 pixel_period = 1000000 / (u32)mode->clock;
8660                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8661
8662                 /* watermark for high clocks */
8663                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8664                     rdev->pm.dpm_enabled) {
8665                         wm_high.yclk =
8666                                 radeon_dpm_get_mclk(rdev, false) * 10;
8667                         wm_high.sclk =
8668                                 radeon_dpm_get_sclk(rdev, false) * 10;
8669                 } else {
8670                         wm_high.yclk = rdev->pm.current_mclk * 10;
8671                         wm_high.sclk = rdev->pm.current_sclk * 10;
8672                 }
8673
8674                 wm_high.disp_clk = mode->clock;
8675                 wm_high.src_width = mode->crtc_hdisplay;
8676                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8677                 wm_high.blank_time = line_time - wm_high.active_time;
8678                 wm_high.interlaced = false;
8679                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8680                         wm_high.interlaced = true;
8681                 wm_high.vsc = radeon_crtc->vsc;
8682                 wm_high.vtaps = 1;
8683                 if (radeon_crtc->rmx_type != RMX_OFF)
8684                         wm_high.vtaps = 2;
8685                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8686                 wm_high.lb_size = lb_size;
8687                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8688                 wm_high.num_heads = num_heads;
8689
8690                 /* set for high clocks */
8691                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8692
8693                 /* possibly force display priority to high */
8694                 /* should really do this at mode validation time... */
8695                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8696                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8697                     !dce8_check_latency_hiding(&wm_high) ||
8698                     (rdev->disp_priority == 2)) {
8699                         DRM_DEBUG_KMS("force priority to high\n");
8700                 }
8701
8702                 /* watermark for low clocks */
8703                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8704                     rdev->pm.dpm_enabled) {
8705                         wm_low.yclk =
8706                                 radeon_dpm_get_mclk(rdev, true) * 10;
8707                         wm_low.sclk =
8708                                 radeon_dpm_get_sclk(rdev, true) * 10;
8709                 } else {
8710                         wm_low.yclk = rdev->pm.current_mclk * 10;
8711                         wm_low.sclk = rdev->pm.current_sclk * 10;
8712                 }
8713
8714                 wm_low.disp_clk = mode->clock;
8715                 wm_low.src_width = mode->crtc_hdisplay;
8716                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8717                 wm_low.blank_time = line_time - wm_low.active_time;
8718                 wm_low.interlaced = false;
8719                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8720                         wm_low.interlaced = true;
8721                 wm_low.vsc = radeon_crtc->vsc;
8722                 wm_low.vtaps = 1;
8723                 if (radeon_crtc->rmx_type != RMX_OFF)
8724                         wm_low.vtaps = 2;
8725                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8726                 wm_low.lb_size = lb_size;
8727                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8728                 wm_low.num_heads = num_heads;
8729
8730                 /* set for low clocks */
8731                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8732
8733                 /* possibly force display priority to high */
8734                 /* should really do this at mode validation time... */
8735                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8736                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8737                     !dce8_check_latency_hiding(&wm_low) ||
8738                     (rdev->disp_priority == 2)) {
8739                         DRM_DEBUG_KMS("force priority to high\n");
8740                 }
8741         }
8742
8743         /* select wm A */
8744         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8745         tmp = wm_mask;
8746         tmp &= ~LATENCY_WATERMARK_MASK(3);
8747         tmp |= LATENCY_WATERMARK_MASK(1);
8748         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8749         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8750                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8751                 LATENCY_HIGH_WATERMARK(line_time)));
8752         /* select wm B */
8753         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8754         tmp &= ~LATENCY_WATERMARK_MASK(3);
8755         tmp |= LATENCY_WATERMARK_MASK(2);
8756         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8757         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8758                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8759                 LATENCY_HIGH_WATERMARK(line_time)));
8760         /* restore original selection */
8761         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8762
8763         /* save values for DPM */
8764         radeon_crtc->line_time = line_time;
8765         radeon_crtc->wm_high = latency_watermark_a;
8766         radeon_crtc->wm_low = latency_watermark_b;
8767 }
8768
8769 /**
8770  * dce8_bandwidth_update - program display watermarks
8771  *
8772  * @rdev: radeon_device pointer
8773  *
8774  * Calculate and program the display watermarks and line
8775  * buffer allocation (CIK).
8776  */
8777 void dce8_bandwidth_update(struct radeon_device *rdev)
8778 {
8779         struct drm_display_mode *mode = NULL;
8780         u32 num_heads = 0, lb_size;
8781         int i;
8782
8783         radeon_update_display_priority(rdev);
8784
8785         for (i = 0; i < rdev->num_crtc; i++) {
8786                 if (rdev->mode_info.crtcs[i]->base.enabled)
8787                         num_heads++;
8788         }
8789         for (i = 0; i < rdev->num_crtc; i++) {
8790                 mode = &rdev->mode_info.crtcs[i]->base.mode;
8791                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8792                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8793         }
8794 }
8795
8796 /**
8797  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8798  *
8799  * @rdev: radeon_device pointer
8800  *
8801  * Fetches a GPU clock counter snapshot (SI).
8802  * Returns the 64 bit clock counter snapshot.
8803  */
8804 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8805 {
8806         uint64_t clock;
8807
8808         mutex_lock(&rdev->gpu_clock_mutex);
8809         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8810         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8811                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8812         mutex_unlock(&rdev->gpu_clock_mutex);
8813         return clock;
8814 }
8815
8816 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8817                               u32 cntl_reg, u32 status_reg)
8818 {
8819         int r, i;
8820         struct atom_clock_dividers dividers;
8821         uint32_t tmp;
8822
8823         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8824                                            clock, false, &dividers);
8825         if (r)
8826                 return r;
8827
8828         tmp = RREG32_SMC(cntl_reg);
8829         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8830         tmp |= dividers.post_divider;
8831         WREG32_SMC(cntl_reg, tmp);
8832
8833         for (i = 0; i < 100; i++) {
8834                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8835                         break;
8836                 mdelay(10);
8837         }
8838         if (i == 100)
8839                 return -ETIMEDOUT;
8840
8841         return 0;
8842 }
8843
8844 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8845 {
8846         int r = 0;
8847
8848         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8849         if (r)
8850                 return r;
8851
8852         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8853         return r;
8854 }
8855
8856 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8857 {
8858         struct pci_dev *root = rdev->pdev->bus->self;
8859         int bridge_pos, gpu_pos;
8860         u32 speed_cntl, mask, current_data_rate;
8861         int ret, i;
8862         u16 tmp16;
8863
8864         if (radeon_pcie_gen2 == 0)
8865                 return;
8866
8867         if (rdev->flags & RADEON_IS_IGP)
8868                 return;
8869
8870         if (!(rdev->flags & RADEON_IS_PCIE))
8871                 return;
8872
8873         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8874         if (ret != 0)
8875                 return;
8876
8877         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8878                 return;
8879
8880         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8881         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8882                 LC_CURRENT_DATA_RATE_SHIFT;
8883         if (mask & DRM_PCIE_SPEED_80) {
8884                 if (current_data_rate == 2) {
8885                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8886                         return;
8887                 }
8888                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8889         } else if (mask & DRM_PCIE_SPEED_50) {
8890                 if (current_data_rate == 1) {
8891                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8892                         return;
8893                 }
8894                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8895         }
8896
8897         bridge_pos = pci_pcie_cap(root);
8898         if (!bridge_pos)
8899                 return;
8900
8901         gpu_pos = pci_pcie_cap(rdev->pdev);
8902         if (!gpu_pos)
8903                 return;
8904
8905         if (mask & DRM_PCIE_SPEED_80) {
8906                 /* re-try equalization if gen3 is not already enabled */
8907                 if (current_data_rate != 2) {
8908                         u16 bridge_cfg, gpu_cfg;
8909                         u16 bridge_cfg2, gpu_cfg2;
8910                         u32 max_lw, current_lw, tmp;
8911
8912                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8913                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8914
8915                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8916                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8917
8918                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8919                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8920
8921                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8922                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8923                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8924
8925                         if (current_lw < max_lw) {
8926                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8927                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8928                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8929                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8930                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8931                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8932                                 }
8933                         }
8934
8935                         for (i = 0; i < 10; i++) {
8936                                 /* check status */
8937                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8938                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8939                                         break;
8940
8941                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8942                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8943
8944                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8945                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8946
8947                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8948                                 tmp |= LC_SET_QUIESCE;
8949                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8950
8951                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8952                                 tmp |= LC_REDO_EQ;
8953                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8954
8955                                 mdelay(100);
8956
8957                                 /* linkctl */
8958                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8959                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8960                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8961                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8962
8963                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8964                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8965                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8966                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8967
8968                                 /* linkctl2 */
8969                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8970                                 tmp16 &= ~((1 << 4) | (7 << 9));
8971                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8972                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8973
8974                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8975                                 tmp16 &= ~((1 << 4) | (7 << 9));
8976                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8977                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8978
8979                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8980                                 tmp &= ~LC_SET_QUIESCE;
8981                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8982                         }
8983                 }
8984         }
8985
8986         /* set the link speed */
8987         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8988         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8989         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8990
8991         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8992         tmp16 &= ~0xf;
8993         if (mask & DRM_PCIE_SPEED_80)
8994                 tmp16 |= 3; /* gen3 */
8995         else if (mask & DRM_PCIE_SPEED_50)
8996                 tmp16 |= 2; /* gen2 */
8997         else
8998                 tmp16 |= 1; /* gen1 */
8999         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9000
9001         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9002         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9003         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9004
9005         for (i = 0; i < rdev->usec_timeout; i++) {
9006                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9007                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9008                         break;
9009                 udelay(1);
9010         }
9011 }
9012
9013 static void cik_program_aspm(struct radeon_device *rdev)
9014 {
9015         u32 data, orig;
9016         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9017         bool disable_clkreq = false;
9018
9019         if (radeon_aspm == 0)
9020                 return;
9021
9022         /* XXX double check IGPs */
9023         if (rdev->flags & RADEON_IS_IGP)
9024                 return;
9025
9026         if (!(rdev->flags & RADEON_IS_PCIE))
9027                 return;
9028
9029         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9030         data &= ~LC_XMIT_N_FTS_MASK;
9031         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9032         if (orig != data)
9033                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9034
9035         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9036         data |= LC_GO_TO_RECOVERY;
9037         if (orig != data)
9038                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9039
9040         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9041         data |= P_IGNORE_EDB_ERR;
9042         if (orig != data)
9043                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9044
9045         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9046         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9047         data |= LC_PMI_TO_L1_DIS;
9048         if (!disable_l0s)
9049                 data |= LC_L0S_INACTIVITY(7);
9050
9051         if (!disable_l1) {
9052                 data |= LC_L1_INACTIVITY(7);
9053                 data &= ~LC_PMI_TO_L1_DIS;
9054                 if (orig != data)
9055                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9056
9057                 if (!disable_plloff_in_l1) {
9058                         bool clk_req_support;
9059
9060                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9061                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9062                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9063                         if (orig != data)
9064                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9065
9066                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9067                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9068                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9069                         if (orig != data)
9070                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9071
9072                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9073                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9074                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9075                         if (orig != data)
9076                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9077
9078                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9079                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9080                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9081                         if (orig != data)
9082                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9083
9084                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9085                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9086                         data |= LC_DYN_LANES_PWR_STATE(3);
9087                         if (orig != data)
9088                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9089
9090                         if (!disable_clkreq) {
9091                                 struct pci_dev *root = rdev->pdev->bus->self;
9092                                 u32 lnkcap;
9093
9094                                 clk_req_support = false;
9095                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9096                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9097                                         clk_req_support = true;
9098                         } else {
9099                                 clk_req_support = false;
9100                         }
9101
9102                         if (clk_req_support) {
9103                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9104                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9105                                 if (orig != data)
9106                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9107
9108                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9109                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9110                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9111                                 if (orig != data)
9112                                         WREG32_SMC(THM_CLK_CNTL, data);
9113
9114                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9115                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9116                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9117                                 if (orig != data)
9118                                         WREG32_SMC(MISC_CLK_CTRL, data);
9119
9120                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9121                                 data &= ~BCLK_AS_XCLK;
9122                                 if (orig != data)
9123                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9124
9125                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9126                                 data &= ~FORCE_BIF_REFCLK_EN;
9127                                 if (orig != data)
9128                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9129
9130                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9131                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9132                                 data |= MPLL_CLKOUT_SEL(4);
9133                                 if (orig != data)
9134                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9135                         }
9136                 }
9137         } else {
9138                 if (orig != data)
9139                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9140         }
9141
9142         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9143         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9144         if (orig != data)
9145                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9146
9147         if (!disable_l0s) {
9148                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9149                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9150                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9151                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9152                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9153                                 data &= ~LC_L0S_INACTIVITY_MASK;
9154                                 if (orig != data)
9155                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9156                         }
9157                 }
9158         }
9159 }