usb: phy: rcar-gen2-usb: always use 'dev' variable in probe() method
[platform/adaptation/renesas_rcar/renesas_kernel.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
52 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
53 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
60 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
61 MODULE_FIRMWARE("radeon/KABINI_me.bin");
62 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
63 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
64 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
65 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
66
67 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
68 extern void r600_ih_ring_fini(struct radeon_device *rdev);
69 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
70 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
71 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
72 extern void sumo_rlc_fini(struct radeon_device *rdev);
73 extern int sumo_rlc_init(struct radeon_device *rdev);
74 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
75 extern void si_rlc_reset(struct radeon_device *rdev);
76 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
77 extern int cik_sdma_resume(struct radeon_device *rdev);
78 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
79 extern void cik_sdma_fini(struct radeon_device *rdev);
80 static void cik_rlc_stop(struct radeon_device *rdev);
81 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
82 static void cik_program_aspm(struct radeon_device *rdev);
83 static void cik_init_pg(struct radeon_device *rdev);
84 static void cik_init_cg(struct radeon_device *rdev);
85 static void cik_fini_pg(struct radeon_device *rdev);
86 static void cik_fini_cg(struct radeon_device *rdev);
87 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
88                                           bool enable);
89
90 /* get temperature in millidegrees */
91 int ci_get_temp(struct radeon_device *rdev)
92 {
93         u32 temp;
94         int actual_temp = 0;
95
96         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
97                 CTF_TEMP_SHIFT;
98
99         if (temp & 0x200)
100                 actual_temp = 255;
101         else
102                 actual_temp = temp & 0x1ff;
103
104         actual_temp = actual_temp * 1000;
105
106         return actual_temp;
107 }
108
109 /* get temperature in millidegrees */
110 int kv_get_temp(struct radeon_device *rdev)
111 {
112         u32 temp;
113         int actual_temp = 0;
114
115         temp = RREG32_SMC(0xC0300E0C);
116
117         if (temp)
118                 actual_temp = (temp / 8) - 49;
119         else
120                 actual_temp = 0;
121
122         actual_temp = actual_temp * 1000;
123
124         return actual_temp;
125 }
126
127 /*
128  * Indirect registers accessor
129  */
130 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
131 {
132         unsigned long flags;
133         u32 r;
134
135         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
136         WREG32(PCIE_INDEX, reg);
137         (void)RREG32(PCIE_INDEX);
138         r = RREG32(PCIE_DATA);
139         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
140         return r;
141 }
142
143 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
144 {
145         unsigned long flags;
146
147         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
148         WREG32(PCIE_INDEX, reg);
149         (void)RREG32(PCIE_INDEX);
150         WREG32(PCIE_DATA, v);
151         (void)RREG32(PCIE_DATA);
152         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
153 }
154
155 static const u32 spectre_rlc_save_restore_register_list[] =
156 {
157         (0x0e00 << 16) | (0xc12c >> 2),
158         0x00000000,
159         (0x0e00 << 16) | (0xc140 >> 2),
160         0x00000000,
161         (0x0e00 << 16) | (0xc150 >> 2),
162         0x00000000,
163         (0x0e00 << 16) | (0xc15c >> 2),
164         0x00000000,
165         (0x0e00 << 16) | (0xc168 >> 2),
166         0x00000000,
167         (0x0e00 << 16) | (0xc170 >> 2),
168         0x00000000,
169         (0x0e00 << 16) | (0xc178 >> 2),
170         0x00000000,
171         (0x0e00 << 16) | (0xc204 >> 2),
172         0x00000000,
173         (0x0e00 << 16) | (0xc2b4 >> 2),
174         0x00000000,
175         (0x0e00 << 16) | (0xc2b8 >> 2),
176         0x00000000,
177         (0x0e00 << 16) | (0xc2bc >> 2),
178         0x00000000,
179         (0x0e00 << 16) | (0xc2c0 >> 2),
180         0x00000000,
181         (0x0e00 << 16) | (0x8228 >> 2),
182         0x00000000,
183         (0x0e00 << 16) | (0x829c >> 2),
184         0x00000000,
185         (0x0e00 << 16) | (0x869c >> 2),
186         0x00000000,
187         (0x0600 << 16) | (0x98f4 >> 2),
188         0x00000000,
189         (0x0e00 << 16) | (0x98f8 >> 2),
190         0x00000000,
191         (0x0e00 << 16) | (0x9900 >> 2),
192         0x00000000,
193         (0x0e00 << 16) | (0xc260 >> 2),
194         0x00000000,
195         (0x0e00 << 16) | (0x90e8 >> 2),
196         0x00000000,
197         (0x0e00 << 16) | (0x3c000 >> 2),
198         0x00000000,
199         (0x0e00 << 16) | (0x3c00c >> 2),
200         0x00000000,
201         (0x0e00 << 16) | (0x8c1c >> 2),
202         0x00000000,
203         (0x0e00 << 16) | (0x9700 >> 2),
204         0x00000000,
205         (0x0e00 << 16) | (0xcd20 >> 2),
206         0x00000000,
207         (0x4e00 << 16) | (0xcd20 >> 2),
208         0x00000000,
209         (0x5e00 << 16) | (0xcd20 >> 2),
210         0x00000000,
211         (0x6e00 << 16) | (0xcd20 >> 2),
212         0x00000000,
213         (0x7e00 << 16) | (0xcd20 >> 2),
214         0x00000000,
215         (0x8e00 << 16) | (0xcd20 >> 2),
216         0x00000000,
217         (0x9e00 << 16) | (0xcd20 >> 2),
218         0x00000000,
219         (0xae00 << 16) | (0xcd20 >> 2),
220         0x00000000,
221         (0xbe00 << 16) | (0xcd20 >> 2),
222         0x00000000,
223         (0x0e00 << 16) | (0x89bc >> 2),
224         0x00000000,
225         (0x0e00 << 16) | (0x8900 >> 2),
226         0x00000000,
227         0x3,
228         (0x0e00 << 16) | (0xc130 >> 2),
229         0x00000000,
230         (0x0e00 << 16) | (0xc134 >> 2),
231         0x00000000,
232         (0x0e00 << 16) | (0xc1fc >> 2),
233         0x00000000,
234         (0x0e00 << 16) | (0xc208 >> 2),
235         0x00000000,
236         (0x0e00 << 16) | (0xc264 >> 2),
237         0x00000000,
238         (0x0e00 << 16) | (0xc268 >> 2),
239         0x00000000,
240         (0x0e00 << 16) | (0xc26c >> 2),
241         0x00000000,
242         (0x0e00 << 16) | (0xc270 >> 2),
243         0x00000000,
244         (0x0e00 << 16) | (0xc274 >> 2),
245         0x00000000,
246         (0x0e00 << 16) | (0xc278 >> 2),
247         0x00000000,
248         (0x0e00 << 16) | (0xc27c >> 2),
249         0x00000000,
250         (0x0e00 << 16) | (0xc280 >> 2),
251         0x00000000,
252         (0x0e00 << 16) | (0xc284 >> 2),
253         0x00000000,
254         (0x0e00 << 16) | (0xc288 >> 2),
255         0x00000000,
256         (0x0e00 << 16) | (0xc28c >> 2),
257         0x00000000,
258         (0x0e00 << 16) | (0xc290 >> 2),
259         0x00000000,
260         (0x0e00 << 16) | (0xc294 >> 2),
261         0x00000000,
262         (0x0e00 << 16) | (0xc298 >> 2),
263         0x00000000,
264         (0x0e00 << 16) | (0xc29c >> 2),
265         0x00000000,
266         (0x0e00 << 16) | (0xc2a0 >> 2),
267         0x00000000,
268         (0x0e00 << 16) | (0xc2a4 >> 2),
269         0x00000000,
270         (0x0e00 << 16) | (0xc2a8 >> 2),
271         0x00000000,
272         (0x0e00 << 16) | (0xc2ac  >> 2),
273         0x00000000,
274         (0x0e00 << 16) | (0xc2b0 >> 2),
275         0x00000000,
276         (0x0e00 << 16) | (0x301d0 >> 2),
277         0x00000000,
278         (0x0e00 << 16) | (0x30238 >> 2),
279         0x00000000,
280         (0x0e00 << 16) | (0x30250 >> 2),
281         0x00000000,
282         (0x0e00 << 16) | (0x30254 >> 2),
283         0x00000000,
284         (0x0e00 << 16) | (0x30258 >> 2),
285         0x00000000,
286         (0x0e00 << 16) | (0x3025c >> 2),
287         0x00000000,
288         (0x4e00 << 16) | (0xc900 >> 2),
289         0x00000000,
290         (0x5e00 << 16) | (0xc900 >> 2),
291         0x00000000,
292         (0x6e00 << 16) | (0xc900 >> 2),
293         0x00000000,
294         (0x7e00 << 16) | (0xc900 >> 2),
295         0x00000000,
296         (0x8e00 << 16) | (0xc900 >> 2),
297         0x00000000,
298         (0x9e00 << 16) | (0xc900 >> 2),
299         0x00000000,
300         (0xae00 << 16) | (0xc900 >> 2),
301         0x00000000,
302         (0xbe00 << 16) | (0xc900 >> 2),
303         0x00000000,
304         (0x4e00 << 16) | (0xc904 >> 2),
305         0x00000000,
306         (0x5e00 << 16) | (0xc904 >> 2),
307         0x00000000,
308         (0x6e00 << 16) | (0xc904 >> 2),
309         0x00000000,
310         (0x7e00 << 16) | (0xc904 >> 2),
311         0x00000000,
312         (0x8e00 << 16) | (0xc904 >> 2),
313         0x00000000,
314         (0x9e00 << 16) | (0xc904 >> 2),
315         0x00000000,
316         (0xae00 << 16) | (0xc904 >> 2),
317         0x00000000,
318         (0xbe00 << 16) | (0xc904 >> 2),
319         0x00000000,
320         (0x4e00 << 16) | (0xc908 >> 2),
321         0x00000000,
322         (0x5e00 << 16) | (0xc908 >> 2),
323         0x00000000,
324         (0x6e00 << 16) | (0xc908 >> 2),
325         0x00000000,
326         (0x7e00 << 16) | (0xc908 >> 2),
327         0x00000000,
328         (0x8e00 << 16) | (0xc908 >> 2),
329         0x00000000,
330         (0x9e00 << 16) | (0xc908 >> 2),
331         0x00000000,
332         (0xae00 << 16) | (0xc908 >> 2),
333         0x00000000,
334         (0xbe00 << 16) | (0xc908 >> 2),
335         0x00000000,
336         (0x4e00 << 16) | (0xc90c >> 2),
337         0x00000000,
338         (0x5e00 << 16) | (0xc90c >> 2),
339         0x00000000,
340         (0x6e00 << 16) | (0xc90c >> 2),
341         0x00000000,
342         (0x7e00 << 16) | (0xc90c >> 2),
343         0x00000000,
344         (0x8e00 << 16) | (0xc90c >> 2),
345         0x00000000,
346         (0x9e00 << 16) | (0xc90c >> 2),
347         0x00000000,
348         (0xae00 << 16) | (0xc90c >> 2),
349         0x00000000,
350         (0xbe00 << 16) | (0xc90c >> 2),
351         0x00000000,
352         (0x4e00 << 16) | (0xc910 >> 2),
353         0x00000000,
354         (0x5e00 << 16) | (0xc910 >> 2),
355         0x00000000,
356         (0x6e00 << 16) | (0xc910 >> 2),
357         0x00000000,
358         (0x7e00 << 16) | (0xc910 >> 2),
359         0x00000000,
360         (0x8e00 << 16) | (0xc910 >> 2),
361         0x00000000,
362         (0x9e00 << 16) | (0xc910 >> 2),
363         0x00000000,
364         (0xae00 << 16) | (0xc910 >> 2),
365         0x00000000,
366         (0xbe00 << 16) | (0xc910 >> 2),
367         0x00000000,
368         (0x0e00 << 16) | (0xc99c >> 2),
369         0x00000000,
370         (0x0e00 << 16) | (0x9834 >> 2),
371         0x00000000,
372         (0x0000 << 16) | (0x30f00 >> 2),
373         0x00000000,
374         (0x0001 << 16) | (0x30f00 >> 2),
375         0x00000000,
376         (0x0000 << 16) | (0x30f04 >> 2),
377         0x00000000,
378         (0x0001 << 16) | (0x30f04 >> 2),
379         0x00000000,
380         (0x0000 << 16) | (0x30f08 >> 2),
381         0x00000000,
382         (0x0001 << 16) | (0x30f08 >> 2),
383         0x00000000,
384         (0x0000 << 16) | (0x30f0c >> 2),
385         0x00000000,
386         (0x0001 << 16) | (0x30f0c >> 2),
387         0x00000000,
388         (0x0600 << 16) | (0x9b7c >> 2),
389         0x00000000,
390         (0x0e00 << 16) | (0x8a14 >> 2),
391         0x00000000,
392         (0x0e00 << 16) | (0x8a18 >> 2),
393         0x00000000,
394         (0x0600 << 16) | (0x30a00 >> 2),
395         0x00000000,
396         (0x0e00 << 16) | (0x8bf0 >> 2),
397         0x00000000,
398         (0x0e00 << 16) | (0x8bcc >> 2),
399         0x00000000,
400         (0x0e00 << 16) | (0x8b24 >> 2),
401         0x00000000,
402         (0x0e00 << 16) | (0x30a04 >> 2),
403         0x00000000,
404         (0x0600 << 16) | (0x30a10 >> 2),
405         0x00000000,
406         (0x0600 << 16) | (0x30a14 >> 2),
407         0x00000000,
408         (0x0600 << 16) | (0x30a18 >> 2),
409         0x00000000,
410         (0x0600 << 16) | (0x30a2c >> 2),
411         0x00000000,
412         (0x0e00 << 16) | (0xc700 >> 2),
413         0x00000000,
414         (0x0e00 << 16) | (0xc704 >> 2),
415         0x00000000,
416         (0x0e00 << 16) | (0xc708 >> 2),
417         0x00000000,
418         (0x0e00 << 16) | (0xc768 >> 2),
419         0x00000000,
420         (0x0400 << 16) | (0xc770 >> 2),
421         0x00000000,
422         (0x0400 << 16) | (0xc774 >> 2),
423         0x00000000,
424         (0x0400 << 16) | (0xc778 >> 2),
425         0x00000000,
426         (0x0400 << 16) | (0xc77c >> 2),
427         0x00000000,
428         (0x0400 << 16) | (0xc780 >> 2),
429         0x00000000,
430         (0x0400 << 16) | (0xc784 >> 2),
431         0x00000000,
432         (0x0400 << 16) | (0xc788 >> 2),
433         0x00000000,
434         (0x0400 << 16) | (0xc78c >> 2),
435         0x00000000,
436         (0x0400 << 16) | (0xc798 >> 2),
437         0x00000000,
438         (0x0400 << 16) | (0xc79c >> 2),
439         0x00000000,
440         (0x0400 << 16) | (0xc7a0 >> 2),
441         0x00000000,
442         (0x0400 << 16) | (0xc7a4 >> 2),
443         0x00000000,
444         (0x0400 << 16) | (0xc7a8 >> 2),
445         0x00000000,
446         (0x0400 << 16) | (0xc7ac >> 2),
447         0x00000000,
448         (0x0400 << 16) | (0xc7b0 >> 2),
449         0x00000000,
450         (0x0400 << 16) | (0xc7b4 >> 2),
451         0x00000000,
452         (0x0e00 << 16) | (0x9100 >> 2),
453         0x00000000,
454         (0x0e00 << 16) | (0x3c010 >> 2),
455         0x00000000,
456         (0x0e00 << 16) | (0x92a8 >> 2),
457         0x00000000,
458         (0x0e00 << 16) | (0x92ac >> 2),
459         0x00000000,
460         (0x0e00 << 16) | (0x92b4 >> 2),
461         0x00000000,
462         (0x0e00 << 16) | (0x92b8 >> 2),
463         0x00000000,
464         (0x0e00 << 16) | (0x92bc >> 2),
465         0x00000000,
466         (0x0e00 << 16) | (0x92c0 >> 2),
467         0x00000000,
468         (0x0e00 << 16) | (0x92c4 >> 2),
469         0x00000000,
470         (0x0e00 << 16) | (0x92c8 >> 2),
471         0x00000000,
472         (0x0e00 << 16) | (0x92cc >> 2),
473         0x00000000,
474         (0x0e00 << 16) | (0x92d0 >> 2),
475         0x00000000,
476         (0x0e00 << 16) | (0x8c00 >> 2),
477         0x00000000,
478         (0x0e00 << 16) | (0x8c04 >> 2),
479         0x00000000,
480         (0x0e00 << 16) | (0x8c20 >> 2),
481         0x00000000,
482         (0x0e00 << 16) | (0x8c38 >> 2),
483         0x00000000,
484         (0x0e00 << 16) | (0x8c3c >> 2),
485         0x00000000,
486         (0x0e00 << 16) | (0xae00 >> 2),
487         0x00000000,
488         (0x0e00 << 16) | (0x9604 >> 2),
489         0x00000000,
490         (0x0e00 << 16) | (0xac08 >> 2),
491         0x00000000,
492         (0x0e00 << 16) | (0xac0c >> 2),
493         0x00000000,
494         (0x0e00 << 16) | (0xac10 >> 2),
495         0x00000000,
496         (0x0e00 << 16) | (0xac14 >> 2),
497         0x00000000,
498         (0x0e00 << 16) | (0xac58 >> 2),
499         0x00000000,
500         (0x0e00 << 16) | (0xac68 >> 2),
501         0x00000000,
502         (0x0e00 << 16) | (0xac6c >> 2),
503         0x00000000,
504         (0x0e00 << 16) | (0xac70 >> 2),
505         0x00000000,
506         (0x0e00 << 16) | (0xac74 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0xac78 >> 2),
509         0x00000000,
510         (0x0e00 << 16) | (0xac7c >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0xac80 >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0xac84 >> 2),
515         0x00000000,
516         (0x0e00 << 16) | (0xac88 >> 2),
517         0x00000000,
518         (0x0e00 << 16) | (0xac8c >> 2),
519         0x00000000,
520         (0x0e00 << 16) | (0x970c >> 2),
521         0x00000000,
522         (0x0e00 << 16) | (0x9714 >> 2),
523         0x00000000,
524         (0x0e00 << 16) | (0x9718 >> 2),
525         0x00000000,
526         (0x0e00 << 16) | (0x971c >> 2),
527         0x00000000,
528         (0x0e00 << 16) | (0x31068 >> 2),
529         0x00000000,
530         (0x4e00 << 16) | (0x31068 >> 2),
531         0x00000000,
532         (0x5e00 << 16) | (0x31068 >> 2),
533         0x00000000,
534         (0x6e00 << 16) | (0x31068 >> 2),
535         0x00000000,
536         (0x7e00 << 16) | (0x31068 >> 2),
537         0x00000000,
538         (0x8e00 << 16) | (0x31068 >> 2),
539         0x00000000,
540         (0x9e00 << 16) | (0x31068 >> 2),
541         0x00000000,
542         (0xae00 << 16) | (0x31068 >> 2),
543         0x00000000,
544         (0xbe00 << 16) | (0x31068 >> 2),
545         0x00000000,
546         (0x0e00 << 16) | (0xcd10 >> 2),
547         0x00000000,
548         (0x0e00 << 16) | (0xcd14 >> 2),
549         0x00000000,
550         (0x0e00 << 16) | (0x88b0 >> 2),
551         0x00000000,
552         (0x0e00 << 16) | (0x88b4 >> 2),
553         0x00000000,
554         (0x0e00 << 16) | (0x88b8 >> 2),
555         0x00000000,
556         (0x0e00 << 16) | (0x88bc >> 2),
557         0x00000000,
558         (0x0400 << 16) | (0x89c0 >> 2),
559         0x00000000,
560         (0x0e00 << 16) | (0x88c4 >> 2),
561         0x00000000,
562         (0x0e00 << 16) | (0x88c8 >> 2),
563         0x00000000,
564         (0x0e00 << 16) | (0x88d0 >> 2),
565         0x00000000,
566         (0x0e00 << 16) | (0x88d4 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x88d8 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x8980 >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x30938 >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x3093c >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x30940 >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x89a0 >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x30900 >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x30904 >> 2),
583         0x00000000,
584         (0x0e00 << 16) | (0x89b4 >> 2),
585         0x00000000,
586         (0x0e00 << 16) | (0x3c210 >> 2),
587         0x00000000,
588         (0x0e00 << 16) | (0x3c214 >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0x3c218 >> 2),
591         0x00000000,
592         (0x0e00 << 16) | (0x8904 >> 2),
593         0x00000000,
594         0x5,
595         (0x0e00 << 16) | (0x8c28 >> 2),
596         (0x0e00 << 16) | (0x8c2c >> 2),
597         (0x0e00 << 16) | (0x8c30 >> 2),
598         (0x0e00 << 16) | (0x8c34 >> 2),
599         (0x0e00 << 16) | (0x9600 >> 2),
600 };
601
602 static const u32 kalindi_rlc_save_restore_register_list[] =
603 {
604         (0x0e00 << 16) | (0xc12c >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xc140 >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xc150 >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0xc15c >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0xc168 >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0xc170 >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0xc204 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0xc2b4 >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0xc2b8 >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0xc2bc >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0xc2c0 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0x8228 >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0x829c >> 2),
629         0x00000000,
630         (0x0e00 << 16) | (0x869c >> 2),
631         0x00000000,
632         (0x0600 << 16) | (0x98f4 >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0x98f8 >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0x9900 >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0xc260 >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0x90e8 >> 2),
641         0x00000000,
642         (0x0e00 << 16) | (0x3c000 >> 2),
643         0x00000000,
644         (0x0e00 << 16) | (0x3c00c >> 2),
645         0x00000000,
646         (0x0e00 << 16) | (0x8c1c >> 2),
647         0x00000000,
648         (0x0e00 << 16) | (0x9700 >> 2),
649         0x00000000,
650         (0x0e00 << 16) | (0xcd20 >> 2),
651         0x00000000,
652         (0x4e00 << 16) | (0xcd20 >> 2),
653         0x00000000,
654         (0x5e00 << 16) | (0xcd20 >> 2),
655         0x00000000,
656         (0x6e00 << 16) | (0xcd20 >> 2),
657         0x00000000,
658         (0x7e00 << 16) | (0xcd20 >> 2),
659         0x00000000,
660         (0x0e00 << 16) | (0x89bc >> 2),
661         0x00000000,
662         (0x0e00 << 16) | (0x8900 >> 2),
663         0x00000000,
664         0x3,
665         (0x0e00 << 16) | (0xc130 >> 2),
666         0x00000000,
667         (0x0e00 << 16) | (0xc134 >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0xc1fc >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0xc208 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0xc264 >> 2),
674         0x00000000,
675         (0x0e00 << 16) | (0xc268 >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0xc26c >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0xc270 >> 2),
680         0x00000000,
681         (0x0e00 << 16) | (0xc274 >> 2),
682         0x00000000,
683         (0x0e00 << 16) | (0xc28c >> 2),
684         0x00000000,
685         (0x0e00 << 16) | (0xc290 >> 2),
686         0x00000000,
687         (0x0e00 << 16) | (0xc294 >> 2),
688         0x00000000,
689         (0x0e00 << 16) | (0xc298 >> 2),
690         0x00000000,
691         (0x0e00 << 16) | (0xc2a0 >> 2),
692         0x00000000,
693         (0x0e00 << 16) | (0xc2a4 >> 2),
694         0x00000000,
695         (0x0e00 << 16) | (0xc2a8 >> 2),
696         0x00000000,
697         (0x0e00 << 16) | (0xc2ac >> 2),
698         0x00000000,
699         (0x0e00 << 16) | (0x301d0 >> 2),
700         0x00000000,
701         (0x0e00 << 16) | (0x30238 >> 2),
702         0x00000000,
703         (0x0e00 << 16) | (0x30250 >> 2),
704         0x00000000,
705         (0x0e00 << 16) | (0x30254 >> 2),
706         0x00000000,
707         (0x0e00 << 16) | (0x30258 >> 2),
708         0x00000000,
709         (0x0e00 << 16) | (0x3025c >> 2),
710         0x00000000,
711         (0x4e00 << 16) | (0xc900 >> 2),
712         0x00000000,
713         (0x5e00 << 16) | (0xc900 >> 2),
714         0x00000000,
715         (0x6e00 << 16) | (0xc900 >> 2),
716         0x00000000,
717         (0x7e00 << 16) | (0xc900 >> 2),
718         0x00000000,
719         (0x4e00 << 16) | (0xc904 >> 2),
720         0x00000000,
721         (0x5e00 << 16) | (0xc904 >> 2),
722         0x00000000,
723         (0x6e00 << 16) | (0xc904 >> 2),
724         0x00000000,
725         (0x7e00 << 16) | (0xc904 >> 2),
726         0x00000000,
727         (0x4e00 << 16) | (0xc908 >> 2),
728         0x00000000,
729         (0x5e00 << 16) | (0xc908 >> 2),
730         0x00000000,
731         (0x6e00 << 16) | (0xc908 >> 2),
732         0x00000000,
733         (0x7e00 << 16) | (0xc908 >> 2),
734         0x00000000,
735         (0x4e00 << 16) | (0xc90c >> 2),
736         0x00000000,
737         (0x5e00 << 16) | (0xc90c >> 2),
738         0x00000000,
739         (0x6e00 << 16) | (0xc90c >> 2),
740         0x00000000,
741         (0x7e00 << 16) | (0xc90c >> 2),
742         0x00000000,
743         (0x4e00 << 16) | (0xc910 >> 2),
744         0x00000000,
745         (0x5e00 << 16) | (0xc910 >> 2),
746         0x00000000,
747         (0x6e00 << 16) | (0xc910 >> 2),
748         0x00000000,
749         (0x7e00 << 16) | (0xc910 >> 2),
750         0x00000000,
751         (0x0e00 << 16) | (0xc99c >> 2),
752         0x00000000,
753         (0x0e00 << 16) | (0x9834 >> 2),
754         0x00000000,
755         (0x0000 << 16) | (0x30f00 >> 2),
756         0x00000000,
757         (0x0000 << 16) | (0x30f04 >> 2),
758         0x00000000,
759         (0x0000 << 16) | (0x30f08 >> 2),
760         0x00000000,
761         (0x0000 << 16) | (0x30f0c >> 2),
762         0x00000000,
763         (0x0600 << 16) | (0x9b7c >> 2),
764         0x00000000,
765         (0x0e00 << 16) | (0x8a14 >> 2),
766         0x00000000,
767         (0x0e00 << 16) | (0x8a18 >> 2),
768         0x00000000,
769         (0x0600 << 16) | (0x30a00 >> 2),
770         0x00000000,
771         (0x0e00 << 16) | (0x8bf0 >> 2),
772         0x00000000,
773         (0x0e00 << 16) | (0x8bcc >> 2),
774         0x00000000,
775         (0x0e00 << 16) | (0x8b24 >> 2),
776         0x00000000,
777         (0x0e00 << 16) | (0x30a04 >> 2),
778         0x00000000,
779         (0x0600 << 16) | (0x30a10 >> 2),
780         0x00000000,
781         (0x0600 << 16) | (0x30a14 >> 2),
782         0x00000000,
783         (0x0600 << 16) | (0x30a18 >> 2),
784         0x00000000,
785         (0x0600 << 16) | (0x30a2c >> 2),
786         0x00000000,
787         (0x0e00 << 16) | (0xc700 >> 2),
788         0x00000000,
789         (0x0e00 << 16) | (0xc704 >> 2),
790         0x00000000,
791         (0x0e00 << 16) | (0xc708 >> 2),
792         0x00000000,
793         (0x0e00 << 16) | (0xc768 >> 2),
794         0x00000000,
795         (0x0400 << 16) | (0xc770 >> 2),
796         0x00000000,
797         (0x0400 << 16) | (0xc774 >> 2),
798         0x00000000,
799         (0x0400 << 16) | (0xc798 >> 2),
800         0x00000000,
801         (0x0400 << 16) | (0xc79c >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0x9100 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0x3c010 >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0x8c00 >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0x8c04 >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0x8c20 >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0x8c38 >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0x8c3c >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0xae00 >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0x9604 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0xac08 >> 2),
822         0x00000000,
823         (0x0e00 << 16) | (0xac0c >> 2),
824         0x00000000,
825         (0x0e00 << 16) | (0xac10 >> 2),
826         0x00000000,
827         (0x0e00 << 16) | (0xac14 >> 2),
828         0x00000000,
829         (0x0e00 << 16) | (0xac58 >> 2),
830         0x00000000,
831         (0x0e00 << 16) | (0xac68 >> 2),
832         0x00000000,
833         (0x0e00 << 16) | (0xac6c >> 2),
834         0x00000000,
835         (0x0e00 << 16) | (0xac70 >> 2),
836         0x00000000,
837         (0x0e00 << 16) | (0xac74 >> 2),
838         0x00000000,
839         (0x0e00 << 16) | (0xac78 >> 2),
840         0x00000000,
841         (0x0e00 << 16) | (0xac7c >> 2),
842         0x00000000,
843         (0x0e00 << 16) | (0xac80 >> 2),
844         0x00000000,
845         (0x0e00 << 16) | (0xac84 >> 2),
846         0x00000000,
847         (0x0e00 << 16) | (0xac88 >> 2),
848         0x00000000,
849         (0x0e00 << 16) | (0xac8c >> 2),
850         0x00000000,
851         (0x0e00 << 16) | (0x970c >> 2),
852         0x00000000,
853         (0x0e00 << 16) | (0x9714 >> 2),
854         0x00000000,
855         (0x0e00 << 16) | (0x9718 >> 2),
856         0x00000000,
857         (0x0e00 << 16) | (0x971c >> 2),
858         0x00000000,
859         (0x0e00 << 16) | (0x31068 >> 2),
860         0x00000000,
861         (0x4e00 << 16) | (0x31068 >> 2),
862         0x00000000,
863         (0x5e00 << 16) | (0x31068 >> 2),
864         0x00000000,
865         (0x6e00 << 16) | (0x31068 >> 2),
866         0x00000000,
867         (0x7e00 << 16) | (0x31068 >> 2),
868         0x00000000,
869         (0x0e00 << 16) | (0xcd10 >> 2),
870         0x00000000,
871         (0x0e00 << 16) | (0xcd14 >> 2),
872         0x00000000,
873         (0x0e00 << 16) | (0x88b0 >> 2),
874         0x00000000,
875         (0x0e00 << 16) | (0x88b4 >> 2),
876         0x00000000,
877         (0x0e00 << 16) | (0x88b8 >> 2),
878         0x00000000,
879         (0x0e00 << 16) | (0x88bc >> 2),
880         0x00000000,
881         (0x0400 << 16) | (0x89c0 >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0x88c4 >> 2),
884         0x00000000,
885         (0x0e00 << 16) | (0x88c8 >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x88d0 >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x88d4 >> 2),
890         0x00000000,
891         (0x0e00 << 16) | (0x88d8 >> 2),
892         0x00000000,
893         (0x0e00 << 16) | (0x8980 >> 2),
894         0x00000000,
895         (0x0e00 << 16) | (0x30938 >> 2),
896         0x00000000,
897         (0x0e00 << 16) | (0x3093c >> 2),
898         0x00000000,
899         (0x0e00 << 16) | (0x30940 >> 2),
900         0x00000000,
901         (0x0e00 << 16) | (0x89a0 >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0x30900 >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0x30904 >> 2),
906         0x00000000,
907         (0x0e00 << 16) | (0x89b4 >> 2),
908         0x00000000,
909         (0x0e00 << 16) | (0x3e1fc >> 2),
910         0x00000000,
911         (0x0e00 << 16) | (0x3c210 >> 2),
912         0x00000000,
913         (0x0e00 << 16) | (0x3c214 >> 2),
914         0x00000000,
915         (0x0e00 << 16) | (0x3c218 >> 2),
916         0x00000000,
917         (0x0e00 << 16) | (0x8904 >> 2),
918         0x00000000,
919         0x5,
920         (0x0e00 << 16) | (0x8c28 >> 2),
921         (0x0e00 << 16) | (0x8c2c >> 2),
922         (0x0e00 << 16) | (0x8c30 >> 2),
923         (0x0e00 << 16) | (0x8c34 >> 2),
924         (0x0e00 << 16) | (0x9600 >> 2),
925 };
926
927 static const u32 bonaire_golden_spm_registers[] =
928 {
929         0x30800, 0xe0ffffff, 0xe0000000
930 };
931
932 static const u32 bonaire_golden_common_registers[] =
933 {
934         0xc770, 0xffffffff, 0x00000800,
935         0xc774, 0xffffffff, 0x00000800,
936         0xc798, 0xffffffff, 0x00007fbf,
937         0xc79c, 0xffffffff, 0x00007faf
938 };
939
940 static const u32 bonaire_golden_registers[] =
941 {
942         0x3354, 0x00000333, 0x00000333,
943         0x3350, 0x000c0fc0, 0x00040200,
944         0x9a10, 0x00010000, 0x00058208,
945         0x3c000, 0xffff1fff, 0x00140000,
946         0x3c200, 0xfdfc0fff, 0x00000100,
947         0x3c234, 0x40000000, 0x40000200,
948         0x9830, 0xffffffff, 0x00000000,
949         0x9834, 0xf00fffff, 0x00000400,
950         0x9838, 0x0002021c, 0x00020200,
951         0xc78, 0x00000080, 0x00000000,
952         0x5bb0, 0x000000f0, 0x00000070,
953         0x5bc0, 0xf0311fff, 0x80300000,
954         0x98f8, 0x73773777, 0x12010001,
955         0x350c, 0x00810000, 0x408af000,
956         0x7030, 0x31000111, 0x00000011,
957         0x2f48, 0x73773777, 0x12010001,
958         0x220c, 0x00007fb6, 0x0021a1b1,
959         0x2210, 0x00007fb6, 0x002021b1,
960         0x2180, 0x00007fb6, 0x00002191,
961         0x2218, 0x00007fb6, 0x002121b1,
962         0x221c, 0x00007fb6, 0x002021b1,
963         0x21dc, 0x00007fb6, 0x00002191,
964         0x21e0, 0x00007fb6, 0x00002191,
965         0x3628, 0x0000003f, 0x0000000a,
966         0x362c, 0x0000003f, 0x0000000a,
967         0x2ae4, 0x00073ffe, 0x000022a2,
968         0x240c, 0x000007ff, 0x00000000,
969         0x8a14, 0xf000003f, 0x00000007,
970         0x8bf0, 0x00002001, 0x00000001,
971         0x8b24, 0xffffffff, 0x00ffffff,
972         0x30a04, 0x0000ff0f, 0x00000000,
973         0x28a4c, 0x07ffffff, 0x06000000,
974         0x4d8, 0x00000fff, 0x00000100,
975         0x3e78, 0x00000001, 0x00000002,
976         0x9100, 0x03000000, 0x0362c688,
977         0x8c00, 0x000000ff, 0x00000001,
978         0xe40, 0x00001fff, 0x00001fff,
979         0x9060, 0x0000007f, 0x00000020,
980         0x9508, 0x00010000, 0x00010000,
981         0xac14, 0x000003ff, 0x000000f3,
982         0xac0c, 0xffffffff, 0x00001032
983 };
984
985 static const u32 bonaire_mgcg_cgcg_init[] =
986 {
987         0xc420, 0xffffffff, 0xfffffffc,
988         0x30800, 0xffffffff, 0xe0000000,
989         0x3c2a0, 0xffffffff, 0x00000100,
990         0x3c208, 0xffffffff, 0x00000100,
991         0x3c2c0, 0xffffffff, 0xc0000100,
992         0x3c2c8, 0xffffffff, 0xc0000100,
993         0x3c2c4, 0xffffffff, 0xc0000100,
994         0x55e4, 0xffffffff, 0x00600100,
995         0x3c280, 0xffffffff, 0x00000100,
996         0x3c214, 0xffffffff, 0x06000100,
997         0x3c220, 0xffffffff, 0x00000100,
998         0x3c218, 0xffffffff, 0x06000100,
999         0x3c204, 0xffffffff, 0x00000100,
1000         0x3c2e0, 0xffffffff, 0x00000100,
1001         0x3c224, 0xffffffff, 0x00000100,
1002         0x3c200, 0xffffffff, 0x00000100,
1003         0x3c230, 0xffffffff, 0x00000100,
1004         0x3c234, 0xffffffff, 0x00000100,
1005         0x3c250, 0xffffffff, 0x00000100,
1006         0x3c254, 0xffffffff, 0x00000100,
1007         0x3c258, 0xffffffff, 0x00000100,
1008         0x3c25c, 0xffffffff, 0x00000100,
1009         0x3c260, 0xffffffff, 0x00000100,
1010         0x3c27c, 0xffffffff, 0x00000100,
1011         0x3c278, 0xffffffff, 0x00000100,
1012         0x3c210, 0xffffffff, 0x06000100,
1013         0x3c290, 0xffffffff, 0x00000100,
1014         0x3c274, 0xffffffff, 0x00000100,
1015         0x3c2b4, 0xffffffff, 0x00000100,
1016         0x3c2b0, 0xffffffff, 0x00000100,
1017         0x3c270, 0xffffffff, 0x00000100,
1018         0x30800, 0xffffffff, 0xe0000000,
1019         0x3c020, 0xffffffff, 0x00010000,
1020         0x3c024, 0xffffffff, 0x00030002,
1021         0x3c028, 0xffffffff, 0x00040007,
1022         0x3c02c, 0xffffffff, 0x00060005,
1023         0x3c030, 0xffffffff, 0x00090008,
1024         0x3c034, 0xffffffff, 0x00010000,
1025         0x3c038, 0xffffffff, 0x00030002,
1026         0x3c03c, 0xffffffff, 0x00040007,
1027         0x3c040, 0xffffffff, 0x00060005,
1028         0x3c044, 0xffffffff, 0x00090008,
1029         0x3c048, 0xffffffff, 0x00010000,
1030         0x3c04c, 0xffffffff, 0x00030002,
1031         0x3c050, 0xffffffff, 0x00040007,
1032         0x3c054, 0xffffffff, 0x00060005,
1033         0x3c058, 0xffffffff, 0x00090008,
1034         0x3c05c, 0xffffffff, 0x00010000,
1035         0x3c060, 0xffffffff, 0x00030002,
1036         0x3c064, 0xffffffff, 0x00040007,
1037         0x3c068, 0xffffffff, 0x00060005,
1038         0x3c06c, 0xffffffff, 0x00090008,
1039         0x3c070, 0xffffffff, 0x00010000,
1040         0x3c074, 0xffffffff, 0x00030002,
1041         0x3c078, 0xffffffff, 0x00040007,
1042         0x3c07c, 0xffffffff, 0x00060005,
1043         0x3c080, 0xffffffff, 0x00090008,
1044         0x3c084, 0xffffffff, 0x00010000,
1045         0x3c088, 0xffffffff, 0x00030002,
1046         0x3c08c, 0xffffffff, 0x00040007,
1047         0x3c090, 0xffffffff, 0x00060005,
1048         0x3c094, 0xffffffff, 0x00090008,
1049         0x3c098, 0xffffffff, 0x00010000,
1050         0x3c09c, 0xffffffff, 0x00030002,
1051         0x3c0a0, 0xffffffff, 0x00040007,
1052         0x3c0a4, 0xffffffff, 0x00060005,
1053         0x3c0a8, 0xffffffff, 0x00090008,
1054         0x3c000, 0xffffffff, 0x96e00200,
1055         0x8708, 0xffffffff, 0x00900100,
1056         0xc424, 0xffffffff, 0x0020003f,
1057         0x38, 0xffffffff, 0x0140001c,
1058         0x3c, 0x000f0000, 0x000f0000,
1059         0x220, 0xffffffff, 0xC060000C,
1060         0x224, 0xc0000fff, 0x00000100,
1061         0xf90, 0xffffffff, 0x00000100,
1062         0xf98, 0x00000101, 0x00000000,
1063         0x20a8, 0xffffffff, 0x00000104,
1064         0x55e4, 0xff000fff, 0x00000100,
1065         0x30cc, 0xc0000fff, 0x00000104,
1066         0xc1e4, 0x00000001, 0x00000001,
1067         0xd00c, 0xff000ff0, 0x00000100,
1068         0xd80c, 0xff000ff0, 0x00000100
1069 };
1070
1071 static const u32 spectre_golden_spm_registers[] =
1072 {
1073         0x30800, 0xe0ffffff, 0xe0000000
1074 };
1075
1076 static const u32 spectre_golden_common_registers[] =
1077 {
1078         0xc770, 0xffffffff, 0x00000800,
1079         0xc774, 0xffffffff, 0x00000800,
1080         0xc798, 0xffffffff, 0x00007fbf,
1081         0xc79c, 0xffffffff, 0x00007faf
1082 };
1083
1084 static const u32 spectre_golden_registers[] =
1085 {
1086         0x3c000, 0xffff1fff, 0x96940200,
1087         0x3c00c, 0xffff0001, 0xff000000,
1088         0x3c200, 0xfffc0fff, 0x00000100,
1089         0x6ed8, 0x00010101, 0x00010000,
1090         0x9834, 0xf00fffff, 0x00000400,
1091         0x9838, 0xfffffffc, 0x00020200,
1092         0x5bb0, 0x000000f0, 0x00000070,
1093         0x5bc0, 0xf0311fff, 0x80300000,
1094         0x98f8, 0x73773777, 0x12010001,
1095         0x9b7c, 0x00ff0000, 0x00fc0000,
1096         0x2f48, 0x73773777, 0x12010001,
1097         0x8a14, 0xf000003f, 0x00000007,
1098         0x8b24, 0xffffffff, 0x00ffffff,
1099         0x28350, 0x3f3f3fff, 0x00000082,
1100         0x28354, 0x0000003f, 0x00000000,
1101         0x3e78, 0x00000001, 0x00000002,
1102         0x913c, 0xffff03df, 0x00000004,
1103         0xc768, 0x00000008, 0x00000008,
1104         0x8c00, 0x000008ff, 0x00000800,
1105         0x9508, 0x00010000, 0x00010000,
1106         0xac0c, 0xffffffff, 0x54763210,
1107         0x214f8, 0x01ff01ff, 0x00000002,
1108         0x21498, 0x007ff800, 0x00200000,
1109         0x2015c, 0xffffffff, 0x00000f40,
1110         0x30934, 0xffffffff, 0x00000001
1111 };
1112
1113 static const u32 spectre_mgcg_cgcg_init[] =
1114 {
1115         0xc420, 0xffffffff, 0xfffffffc,
1116         0x30800, 0xffffffff, 0xe0000000,
1117         0x3c2a0, 0xffffffff, 0x00000100,
1118         0x3c208, 0xffffffff, 0x00000100,
1119         0x3c2c0, 0xffffffff, 0x00000100,
1120         0x3c2c8, 0xffffffff, 0x00000100,
1121         0x3c2c4, 0xffffffff, 0x00000100,
1122         0x55e4, 0xffffffff, 0x00600100,
1123         0x3c280, 0xffffffff, 0x00000100,
1124         0x3c214, 0xffffffff, 0x06000100,
1125         0x3c220, 0xffffffff, 0x00000100,
1126         0x3c218, 0xffffffff, 0x06000100,
1127         0x3c204, 0xffffffff, 0x00000100,
1128         0x3c2e0, 0xffffffff, 0x00000100,
1129         0x3c224, 0xffffffff, 0x00000100,
1130         0x3c200, 0xffffffff, 0x00000100,
1131         0x3c230, 0xffffffff, 0x00000100,
1132         0x3c234, 0xffffffff, 0x00000100,
1133         0x3c250, 0xffffffff, 0x00000100,
1134         0x3c254, 0xffffffff, 0x00000100,
1135         0x3c258, 0xffffffff, 0x00000100,
1136         0x3c25c, 0xffffffff, 0x00000100,
1137         0x3c260, 0xffffffff, 0x00000100,
1138         0x3c27c, 0xffffffff, 0x00000100,
1139         0x3c278, 0xffffffff, 0x00000100,
1140         0x3c210, 0xffffffff, 0x06000100,
1141         0x3c290, 0xffffffff, 0x00000100,
1142         0x3c274, 0xffffffff, 0x00000100,
1143         0x3c2b4, 0xffffffff, 0x00000100,
1144         0x3c2b0, 0xffffffff, 0x00000100,
1145         0x3c270, 0xffffffff, 0x00000100,
1146         0x30800, 0xffffffff, 0xe0000000,
1147         0x3c020, 0xffffffff, 0x00010000,
1148         0x3c024, 0xffffffff, 0x00030002,
1149         0x3c028, 0xffffffff, 0x00040007,
1150         0x3c02c, 0xffffffff, 0x00060005,
1151         0x3c030, 0xffffffff, 0x00090008,
1152         0x3c034, 0xffffffff, 0x00010000,
1153         0x3c038, 0xffffffff, 0x00030002,
1154         0x3c03c, 0xffffffff, 0x00040007,
1155         0x3c040, 0xffffffff, 0x00060005,
1156         0x3c044, 0xffffffff, 0x00090008,
1157         0x3c048, 0xffffffff, 0x00010000,
1158         0x3c04c, 0xffffffff, 0x00030002,
1159         0x3c050, 0xffffffff, 0x00040007,
1160         0x3c054, 0xffffffff, 0x00060005,
1161         0x3c058, 0xffffffff, 0x00090008,
1162         0x3c05c, 0xffffffff, 0x00010000,
1163         0x3c060, 0xffffffff, 0x00030002,
1164         0x3c064, 0xffffffff, 0x00040007,
1165         0x3c068, 0xffffffff, 0x00060005,
1166         0x3c06c, 0xffffffff, 0x00090008,
1167         0x3c070, 0xffffffff, 0x00010000,
1168         0x3c074, 0xffffffff, 0x00030002,
1169         0x3c078, 0xffffffff, 0x00040007,
1170         0x3c07c, 0xffffffff, 0x00060005,
1171         0x3c080, 0xffffffff, 0x00090008,
1172         0x3c084, 0xffffffff, 0x00010000,
1173         0x3c088, 0xffffffff, 0x00030002,
1174         0x3c08c, 0xffffffff, 0x00040007,
1175         0x3c090, 0xffffffff, 0x00060005,
1176         0x3c094, 0xffffffff, 0x00090008,
1177         0x3c098, 0xffffffff, 0x00010000,
1178         0x3c09c, 0xffffffff, 0x00030002,
1179         0x3c0a0, 0xffffffff, 0x00040007,
1180         0x3c0a4, 0xffffffff, 0x00060005,
1181         0x3c0a8, 0xffffffff, 0x00090008,
1182         0x3c0ac, 0xffffffff, 0x00010000,
1183         0x3c0b0, 0xffffffff, 0x00030002,
1184         0x3c0b4, 0xffffffff, 0x00040007,
1185         0x3c0b8, 0xffffffff, 0x00060005,
1186         0x3c0bc, 0xffffffff, 0x00090008,
1187         0x3c000, 0xffffffff, 0x96e00200,
1188         0x8708, 0xffffffff, 0x00900100,
1189         0xc424, 0xffffffff, 0x0020003f,
1190         0x38, 0xffffffff, 0x0140001c,
1191         0x3c, 0x000f0000, 0x000f0000,
1192         0x220, 0xffffffff, 0xC060000C,
1193         0x224, 0xc0000fff, 0x00000100,
1194         0xf90, 0xffffffff, 0x00000100,
1195         0xf98, 0x00000101, 0x00000000,
1196         0x20a8, 0xffffffff, 0x00000104,
1197         0x55e4, 0xff000fff, 0x00000100,
1198         0x30cc, 0xc0000fff, 0x00000104,
1199         0xc1e4, 0x00000001, 0x00000001,
1200         0xd00c, 0xff000ff0, 0x00000100,
1201         0xd80c, 0xff000ff0, 0x00000100
1202 };
1203
1204 static const u32 kalindi_golden_spm_registers[] =
1205 {
1206         0x30800, 0xe0ffffff, 0xe0000000
1207 };
1208
1209 static const u32 kalindi_golden_common_registers[] =
1210 {
1211         0xc770, 0xffffffff, 0x00000800,
1212         0xc774, 0xffffffff, 0x00000800,
1213         0xc798, 0xffffffff, 0x00007fbf,
1214         0xc79c, 0xffffffff, 0x00007faf
1215 };
1216
1217 static const u32 kalindi_golden_registers[] =
1218 {
1219         0x3c000, 0xffffdfff, 0x6e944040,
1220         0x55e4, 0xff607fff, 0xfc000100,
1221         0x3c220, 0xff000fff, 0x00000100,
1222         0x3c224, 0xff000fff, 0x00000100,
1223         0x3c200, 0xfffc0fff, 0x00000100,
1224         0x6ed8, 0x00010101, 0x00010000,
1225         0x9830, 0xffffffff, 0x00000000,
1226         0x9834, 0xf00fffff, 0x00000400,
1227         0x5bb0, 0x000000f0, 0x00000070,
1228         0x5bc0, 0xf0311fff, 0x80300000,
1229         0x98f8, 0x73773777, 0x12010001,
1230         0x98fc, 0xffffffff, 0x00000010,
1231         0x9b7c, 0x00ff0000, 0x00fc0000,
1232         0x8030, 0x00001f0f, 0x0000100a,
1233         0x2f48, 0x73773777, 0x12010001,
1234         0x2408, 0x000fffff, 0x000c007f,
1235         0x8a14, 0xf000003f, 0x00000007,
1236         0x8b24, 0x3fff3fff, 0x00ffcfff,
1237         0x30a04, 0x0000ff0f, 0x00000000,
1238         0x28a4c, 0x07ffffff, 0x06000000,
1239         0x4d8, 0x00000fff, 0x00000100,
1240         0x3e78, 0x00000001, 0x00000002,
1241         0xc768, 0x00000008, 0x00000008,
1242         0x8c00, 0x000000ff, 0x00000003,
1243         0x214f8, 0x01ff01ff, 0x00000002,
1244         0x21498, 0x007ff800, 0x00200000,
1245         0x2015c, 0xffffffff, 0x00000f40,
1246         0x88c4, 0x001f3ae3, 0x00000082,
1247         0x88d4, 0x0000001f, 0x00000010,
1248         0x30934, 0xffffffff, 0x00000000
1249 };
1250
1251 static const u32 kalindi_mgcg_cgcg_init[] =
1252 {
1253         0xc420, 0xffffffff, 0xfffffffc,
1254         0x30800, 0xffffffff, 0xe0000000,
1255         0x3c2a0, 0xffffffff, 0x00000100,
1256         0x3c208, 0xffffffff, 0x00000100,
1257         0x3c2c0, 0xffffffff, 0x00000100,
1258         0x3c2c8, 0xffffffff, 0x00000100,
1259         0x3c2c4, 0xffffffff, 0x00000100,
1260         0x55e4, 0xffffffff, 0x00600100,
1261         0x3c280, 0xffffffff, 0x00000100,
1262         0x3c214, 0xffffffff, 0x06000100,
1263         0x3c220, 0xffffffff, 0x00000100,
1264         0x3c218, 0xffffffff, 0x06000100,
1265         0x3c204, 0xffffffff, 0x00000100,
1266         0x3c2e0, 0xffffffff, 0x00000100,
1267         0x3c224, 0xffffffff, 0x00000100,
1268         0x3c200, 0xffffffff, 0x00000100,
1269         0x3c230, 0xffffffff, 0x00000100,
1270         0x3c234, 0xffffffff, 0x00000100,
1271         0x3c250, 0xffffffff, 0x00000100,
1272         0x3c254, 0xffffffff, 0x00000100,
1273         0x3c258, 0xffffffff, 0x00000100,
1274         0x3c25c, 0xffffffff, 0x00000100,
1275         0x3c260, 0xffffffff, 0x00000100,
1276         0x3c27c, 0xffffffff, 0x00000100,
1277         0x3c278, 0xffffffff, 0x00000100,
1278         0x3c210, 0xffffffff, 0x06000100,
1279         0x3c290, 0xffffffff, 0x00000100,
1280         0x3c274, 0xffffffff, 0x00000100,
1281         0x3c2b4, 0xffffffff, 0x00000100,
1282         0x3c2b0, 0xffffffff, 0x00000100,
1283         0x3c270, 0xffffffff, 0x00000100,
1284         0x30800, 0xffffffff, 0xe0000000,
1285         0x3c020, 0xffffffff, 0x00010000,
1286         0x3c024, 0xffffffff, 0x00030002,
1287         0x3c028, 0xffffffff, 0x00040007,
1288         0x3c02c, 0xffffffff, 0x00060005,
1289         0x3c030, 0xffffffff, 0x00090008,
1290         0x3c034, 0xffffffff, 0x00010000,
1291         0x3c038, 0xffffffff, 0x00030002,
1292         0x3c03c, 0xffffffff, 0x00040007,
1293         0x3c040, 0xffffffff, 0x00060005,
1294         0x3c044, 0xffffffff, 0x00090008,
1295         0x3c000, 0xffffffff, 0x96e00200,
1296         0x8708, 0xffffffff, 0x00900100,
1297         0xc424, 0xffffffff, 0x0020003f,
1298         0x38, 0xffffffff, 0x0140001c,
1299         0x3c, 0x000f0000, 0x000f0000,
1300         0x220, 0xffffffff, 0xC060000C,
1301         0x224, 0xc0000fff, 0x00000100,
1302         0x20a8, 0xffffffff, 0x00000104,
1303         0x55e4, 0xff000fff, 0x00000100,
1304         0x30cc, 0xc0000fff, 0x00000104,
1305         0xc1e4, 0x00000001, 0x00000001,
1306         0xd00c, 0xff000ff0, 0x00000100,
1307         0xd80c, 0xff000ff0, 0x00000100
1308 };
1309
1310 static const u32 hawaii_golden_spm_registers[] =
1311 {
1312         0x30800, 0xe0ffffff, 0xe0000000
1313 };
1314
1315 static const u32 hawaii_golden_common_registers[] =
1316 {
1317         0x30800, 0xffffffff, 0xe0000000,
1318         0x28350, 0xffffffff, 0x3a00161a,
1319         0x28354, 0xffffffff, 0x0000002e,
1320         0x9a10, 0xffffffff, 0x00018208,
1321         0x98f8, 0xffffffff, 0x12011003
1322 };
1323
1324 static const u32 hawaii_golden_registers[] =
1325 {
1326         0x3354, 0x00000333, 0x00000333,
1327         0x9a10, 0x00010000, 0x00058208,
1328         0x9830, 0xffffffff, 0x00000000,
1329         0x9834, 0xf00fffff, 0x00000400,
1330         0x9838, 0x0002021c, 0x00020200,
1331         0xc78, 0x00000080, 0x00000000,
1332         0x5bb0, 0x000000f0, 0x00000070,
1333         0x5bc0, 0xf0311fff, 0x80300000,
1334         0x350c, 0x00810000, 0x408af000,
1335         0x7030, 0x31000111, 0x00000011,
1336         0x2f48, 0x73773777, 0x12010001,
1337         0x2120, 0x0000007f, 0x0000001b,
1338         0x21dc, 0x00007fb6, 0x00002191,
1339         0x3628, 0x0000003f, 0x0000000a,
1340         0x362c, 0x0000003f, 0x0000000a,
1341         0x2ae4, 0x00073ffe, 0x000022a2,
1342         0x240c, 0x000007ff, 0x00000000,
1343         0x8bf0, 0x00002001, 0x00000001,
1344         0x8b24, 0xffffffff, 0x00ffffff,
1345         0x30a04, 0x0000ff0f, 0x00000000,
1346         0x28a4c, 0x07ffffff, 0x06000000,
1347         0x3e78, 0x00000001, 0x00000002,
1348         0xc768, 0x00000008, 0x00000008,
1349         0xc770, 0x00000f00, 0x00000800,
1350         0xc774, 0x00000f00, 0x00000800,
1351         0xc798, 0x00ffffff, 0x00ff7fbf,
1352         0xc79c, 0x00ffffff, 0x00ff7faf,
1353         0x8c00, 0x000000ff, 0x00000800,
1354         0xe40, 0x00001fff, 0x00001fff,
1355         0x9060, 0x0000007f, 0x00000020,
1356         0x9508, 0x00010000, 0x00010000,
1357         0xae00, 0x00100000, 0x000ff07c,
1358         0xac14, 0x000003ff, 0x0000000f,
1359         0xac10, 0xffffffff, 0x7564fdec,
1360         0xac0c, 0xffffffff, 0x3120b9a8,
1361         0xac08, 0x20000000, 0x0f9c0000
1362 };
1363
1364 static const u32 hawaii_mgcg_cgcg_init[] =
1365 {
1366         0xc420, 0xffffffff, 0xfffffffd,
1367         0x30800, 0xffffffff, 0xe0000000,
1368         0x3c2a0, 0xffffffff, 0x00000100,
1369         0x3c208, 0xffffffff, 0x00000100,
1370         0x3c2c0, 0xffffffff, 0x00000100,
1371         0x3c2c8, 0xffffffff, 0x00000100,
1372         0x3c2c4, 0xffffffff, 0x00000100,
1373         0x55e4, 0xffffffff, 0x00200100,
1374         0x3c280, 0xffffffff, 0x00000100,
1375         0x3c214, 0xffffffff, 0x06000100,
1376         0x3c220, 0xffffffff, 0x00000100,
1377         0x3c218, 0xffffffff, 0x06000100,
1378         0x3c204, 0xffffffff, 0x00000100,
1379         0x3c2e0, 0xffffffff, 0x00000100,
1380         0x3c224, 0xffffffff, 0x00000100,
1381         0x3c200, 0xffffffff, 0x00000100,
1382         0x3c230, 0xffffffff, 0x00000100,
1383         0x3c234, 0xffffffff, 0x00000100,
1384         0x3c250, 0xffffffff, 0x00000100,
1385         0x3c254, 0xffffffff, 0x00000100,
1386         0x3c258, 0xffffffff, 0x00000100,
1387         0x3c25c, 0xffffffff, 0x00000100,
1388         0x3c260, 0xffffffff, 0x00000100,
1389         0x3c27c, 0xffffffff, 0x00000100,
1390         0x3c278, 0xffffffff, 0x00000100,
1391         0x3c210, 0xffffffff, 0x06000100,
1392         0x3c290, 0xffffffff, 0x00000100,
1393         0x3c274, 0xffffffff, 0x00000100,
1394         0x3c2b4, 0xffffffff, 0x00000100,
1395         0x3c2b0, 0xffffffff, 0x00000100,
1396         0x3c270, 0xffffffff, 0x00000100,
1397         0x30800, 0xffffffff, 0xe0000000,
1398         0x3c020, 0xffffffff, 0x00010000,
1399         0x3c024, 0xffffffff, 0x00030002,
1400         0x3c028, 0xffffffff, 0x00040007,
1401         0x3c02c, 0xffffffff, 0x00060005,
1402         0x3c030, 0xffffffff, 0x00090008,
1403         0x3c034, 0xffffffff, 0x00010000,
1404         0x3c038, 0xffffffff, 0x00030002,
1405         0x3c03c, 0xffffffff, 0x00040007,
1406         0x3c040, 0xffffffff, 0x00060005,
1407         0x3c044, 0xffffffff, 0x00090008,
1408         0x3c048, 0xffffffff, 0x00010000,
1409         0x3c04c, 0xffffffff, 0x00030002,
1410         0x3c050, 0xffffffff, 0x00040007,
1411         0x3c054, 0xffffffff, 0x00060005,
1412         0x3c058, 0xffffffff, 0x00090008,
1413         0x3c05c, 0xffffffff, 0x00010000,
1414         0x3c060, 0xffffffff, 0x00030002,
1415         0x3c064, 0xffffffff, 0x00040007,
1416         0x3c068, 0xffffffff, 0x00060005,
1417         0x3c06c, 0xffffffff, 0x00090008,
1418         0x3c070, 0xffffffff, 0x00010000,
1419         0x3c074, 0xffffffff, 0x00030002,
1420         0x3c078, 0xffffffff, 0x00040007,
1421         0x3c07c, 0xffffffff, 0x00060005,
1422         0x3c080, 0xffffffff, 0x00090008,
1423         0x3c084, 0xffffffff, 0x00010000,
1424         0x3c088, 0xffffffff, 0x00030002,
1425         0x3c08c, 0xffffffff, 0x00040007,
1426         0x3c090, 0xffffffff, 0x00060005,
1427         0x3c094, 0xffffffff, 0x00090008,
1428         0x3c098, 0xffffffff, 0x00010000,
1429         0x3c09c, 0xffffffff, 0x00030002,
1430         0x3c0a0, 0xffffffff, 0x00040007,
1431         0x3c0a4, 0xffffffff, 0x00060005,
1432         0x3c0a8, 0xffffffff, 0x00090008,
1433         0x3c0ac, 0xffffffff, 0x00010000,
1434         0x3c0b0, 0xffffffff, 0x00030002,
1435         0x3c0b4, 0xffffffff, 0x00040007,
1436         0x3c0b8, 0xffffffff, 0x00060005,
1437         0x3c0bc, 0xffffffff, 0x00090008,
1438         0x3c0c0, 0xffffffff, 0x00010000,
1439         0x3c0c4, 0xffffffff, 0x00030002,
1440         0x3c0c8, 0xffffffff, 0x00040007,
1441         0x3c0cc, 0xffffffff, 0x00060005,
1442         0x3c0d0, 0xffffffff, 0x00090008,
1443         0x3c0d4, 0xffffffff, 0x00010000,
1444         0x3c0d8, 0xffffffff, 0x00030002,
1445         0x3c0dc, 0xffffffff, 0x00040007,
1446         0x3c0e0, 0xffffffff, 0x00060005,
1447         0x3c0e4, 0xffffffff, 0x00090008,
1448         0x3c0e8, 0xffffffff, 0x00010000,
1449         0x3c0ec, 0xffffffff, 0x00030002,
1450         0x3c0f0, 0xffffffff, 0x00040007,
1451         0x3c0f4, 0xffffffff, 0x00060005,
1452         0x3c0f8, 0xffffffff, 0x00090008,
1453         0xc318, 0xffffffff, 0x00020200,
1454         0x3350, 0xffffffff, 0x00000200,
1455         0x15c0, 0xffffffff, 0x00000400,
1456         0x55e8, 0xffffffff, 0x00000000,
1457         0x2f50, 0xffffffff, 0x00000902,
1458         0x3c000, 0xffffffff, 0x96940200,
1459         0x8708, 0xffffffff, 0x00900100,
1460         0xc424, 0xffffffff, 0x0020003f,
1461         0x38, 0xffffffff, 0x0140001c,
1462         0x3c, 0x000f0000, 0x000f0000,
1463         0x220, 0xffffffff, 0xc060000c,
1464         0x224, 0xc0000fff, 0x00000100,
1465         0xf90, 0xffffffff, 0x00000100,
1466         0xf98, 0x00000101, 0x00000000,
1467         0x20a8, 0xffffffff, 0x00000104,
1468         0x55e4, 0xff000fff, 0x00000100,
1469         0x30cc, 0xc0000fff, 0x00000104,
1470         0xc1e4, 0x00000001, 0x00000001,
1471         0xd00c, 0xff000ff0, 0x00000100,
1472         0xd80c, 0xff000ff0, 0x00000100
1473 };
1474
1475 static void cik_init_golden_registers(struct radeon_device *rdev)
1476 {
1477         switch (rdev->family) {
1478         case CHIP_BONAIRE:
1479                 radeon_program_register_sequence(rdev,
1480                                                  bonaire_mgcg_cgcg_init,
1481                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1482                 radeon_program_register_sequence(rdev,
1483                                                  bonaire_golden_registers,
1484                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1485                 radeon_program_register_sequence(rdev,
1486                                                  bonaire_golden_common_registers,
1487                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1488                 radeon_program_register_sequence(rdev,
1489                                                  bonaire_golden_spm_registers,
1490                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1491                 break;
1492         case CHIP_KABINI:
1493                 radeon_program_register_sequence(rdev,
1494                                                  kalindi_mgcg_cgcg_init,
1495                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1496                 radeon_program_register_sequence(rdev,
1497                                                  kalindi_golden_registers,
1498                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1499                 radeon_program_register_sequence(rdev,
1500                                                  kalindi_golden_common_registers,
1501                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1502                 radeon_program_register_sequence(rdev,
1503                                                  kalindi_golden_spm_registers,
1504                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1505                 break;
1506         case CHIP_KAVERI:
1507                 radeon_program_register_sequence(rdev,
1508                                                  spectre_mgcg_cgcg_init,
1509                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1510                 radeon_program_register_sequence(rdev,
1511                                                  spectre_golden_registers,
1512                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1513                 radeon_program_register_sequence(rdev,
1514                                                  spectre_golden_common_registers,
1515                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1516                 radeon_program_register_sequence(rdev,
1517                                                  spectre_golden_spm_registers,
1518                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1519                 break;
1520         case CHIP_HAWAII:
1521                 radeon_program_register_sequence(rdev,
1522                                                  hawaii_mgcg_cgcg_init,
1523                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1524                 radeon_program_register_sequence(rdev,
1525                                                  hawaii_golden_registers,
1526                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1527                 radeon_program_register_sequence(rdev,
1528                                                  hawaii_golden_common_registers,
1529                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1530                 radeon_program_register_sequence(rdev,
1531                                                  hawaii_golden_spm_registers,
1532                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1533                 break;
1534         default:
1535                 break;
1536         }
1537 }
1538
1539 /**
1540  * cik_get_xclk - get the xclk
1541  *
1542  * @rdev: radeon_device pointer
1543  *
1544  * Returns the reference clock used by the gfx engine
1545  * (CIK).
1546  */
1547 u32 cik_get_xclk(struct radeon_device *rdev)
1548 {
1549         u32 reference_clock = rdev->clock.spll.reference_freq;
1550
1551         if (rdev->flags & RADEON_IS_IGP) {
1552                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1553                         return reference_clock / 2;
1554         } else {
1555                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1556                         return reference_clock / 4;
1557         }
1558         return reference_clock;
1559 }
1560
1561 /**
1562  * cik_mm_rdoorbell - read a doorbell dword
1563  *
1564  * @rdev: radeon_device pointer
1565  * @index: doorbell index
1566  *
1567  * Returns the value in the doorbell aperture at the
1568  * requested doorbell index (CIK).
1569  */
1570 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1571 {
1572         if (index < rdev->doorbell.num_doorbells) {
1573                 return readl(rdev->doorbell.ptr + index);
1574         } else {
1575                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1576                 return 0;
1577         }
1578 }
1579
1580 /**
1581  * cik_mm_wdoorbell - write a doorbell dword
1582  *
1583  * @rdev: radeon_device pointer
1584  * @index: doorbell index
1585  * @v: value to write
1586  *
1587  * Writes @v to the doorbell aperture at the
1588  * requested doorbell index (CIK).
1589  */
1590 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1591 {
1592         if (index < rdev->doorbell.num_doorbells) {
1593                 writel(v, rdev->doorbell.ptr + index);
1594         } else {
1595                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1596         }
1597 }
1598
1599 #define BONAIRE_IO_MC_REGS_SIZE 36
1600
1601 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1602 {
1603         {0x00000070, 0x04400000},
1604         {0x00000071, 0x80c01803},
1605         {0x00000072, 0x00004004},
1606         {0x00000073, 0x00000100},
1607         {0x00000074, 0x00ff0000},
1608         {0x00000075, 0x34000000},
1609         {0x00000076, 0x08000014},
1610         {0x00000077, 0x00cc08ec},
1611         {0x00000078, 0x00000400},
1612         {0x00000079, 0x00000000},
1613         {0x0000007a, 0x04090000},
1614         {0x0000007c, 0x00000000},
1615         {0x0000007e, 0x4408a8e8},
1616         {0x0000007f, 0x00000304},
1617         {0x00000080, 0x00000000},
1618         {0x00000082, 0x00000001},
1619         {0x00000083, 0x00000002},
1620         {0x00000084, 0xf3e4f400},
1621         {0x00000085, 0x052024e3},
1622         {0x00000087, 0x00000000},
1623         {0x00000088, 0x01000000},
1624         {0x0000008a, 0x1c0a0000},
1625         {0x0000008b, 0xff010000},
1626         {0x0000008d, 0xffffefff},
1627         {0x0000008e, 0xfff3efff},
1628         {0x0000008f, 0xfff3efbf},
1629         {0x00000092, 0xf7ffffff},
1630         {0x00000093, 0xffffff7f},
1631         {0x00000095, 0x00101101},
1632         {0x00000096, 0x00000fff},
1633         {0x00000097, 0x00116fff},
1634         {0x00000098, 0x60010000},
1635         {0x00000099, 0x10010000},
1636         {0x0000009a, 0x00006000},
1637         {0x0000009b, 0x00001000},
1638         {0x0000009f, 0x00b48000}
1639 };
1640
1641 #define HAWAII_IO_MC_REGS_SIZE 22
1642
1643 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1644 {
1645         {0x0000007d, 0x40000000},
1646         {0x0000007e, 0x40180304},
1647         {0x0000007f, 0x0000ff00},
1648         {0x00000081, 0x00000000},
1649         {0x00000083, 0x00000800},
1650         {0x00000086, 0x00000000},
1651         {0x00000087, 0x00000100},
1652         {0x00000088, 0x00020100},
1653         {0x00000089, 0x00000000},
1654         {0x0000008b, 0x00040000},
1655         {0x0000008c, 0x00000100},
1656         {0x0000008e, 0xff010000},
1657         {0x00000090, 0xffffefff},
1658         {0x00000091, 0xfff3efff},
1659         {0x00000092, 0xfff3efbf},
1660         {0x00000093, 0xf7ffffff},
1661         {0x00000094, 0xffffff7f},
1662         {0x00000095, 0x00000fff},
1663         {0x00000096, 0x00116fff},
1664         {0x00000097, 0x60010000},
1665         {0x00000098, 0x10010000},
1666         {0x0000009f, 0x00c79000}
1667 };
1668
1669
1670 /**
1671  * cik_srbm_select - select specific register instances
1672  *
1673  * @rdev: radeon_device pointer
1674  * @me: selected ME (micro engine)
1675  * @pipe: pipe
1676  * @queue: queue
1677  * @vmid: VMID
1678  *
1679  * Switches the currently active registers instances.  Some
1680  * registers are instanced per VMID, others are instanced per
1681  * me/pipe/queue combination.
1682  */
1683 static void cik_srbm_select(struct radeon_device *rdev,
1684                             u32 me, u32 pipe, u32 queue, u32 vmid)
1685 {
1686         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1687                              MEID(me & 0x3) |
1688                              VMID(vmid & 0xf) |
1689                              QUEUEID(queue & 0x7));
1690         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1691 }
1692
1693 /* ucode loading */
1694 /**
1695  * ci_mc_load_microcode - load MC ucode into the hw
1696  *
1697  * @rdev: radeon_device pointer
1698  *
1699  * Load the GDDR MC ucode into the hw (CIK).
1700  * Returns 0 on success, error on failure.
1701  */
1702 int ci_mc_load_microcode(struct radeon_device *rdev)
1703 {
1704         const __be32 *fw_data;
1705         u32 running, blackout = 0;
1706         u32 *io_mc_regs;
1707         int i, regs_size, ucode_size;
1708
1709         if (!rdev->mc_fw)
1710                 return -EINVAL;
1711
1712         ucode_size = rdev->mc_fw->size / 4;
1713
1714         switch (rdev->family) {
1715         case CHIP_BONAIRE:
1716                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1717                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1718                 break;
1719         case CHIP_HAWAII:
1720                 io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1721                 regs_size = HAWAII_IO_MC_REGS_SIZE;
1722                 break;
1723         default:
1724                 return -EINVAL;
1725         }
1726
1727         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1728
1729         if (running == 0) {
1730                 if (running) {
1731                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1732                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1733                 }
1734
1735                 /* reset the engine and set to writable */
1736                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1737                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1738
1739                 /* load mc io regs */
1740                 for (i = 0; i < regs_size; i++) {
1741                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1742                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1743                 }
1744                 /* load the MC ucode */
1745                 fw_data = (const __be32 *)rdev->mc_fw->data;
1746                 for (i = 0; i < ucode_size; i++)
1747                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1748
1749                 /* put the engine back into the active state */
1750                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1751                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1752                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1753
1754                 /* wait for training to complete */
1755                 for (i = 0; i < rdev->usec_timeout; i++) {
1756                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1757                                 break;
1758                         udelay(1);
1759                 }
1760                 for (i = 0; i < rdev->usec_timeout; i++) {
1761                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1762                                 break;
1763                         udelay(1);
1764                 }
1765
1766                 if (running)
1767                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1768         }
1769
1770         return 0;
1771 }
1772
1773 /**
1774  * cik_init_microcode - load ucode images from disk
1775  *
1776  * @rdev: radeon_device pointer
1777  *
1778  * Use the firmware interface to load the ucode images into
1779  * the driver (not loaded into hw).
1780  * Returns 0 on success, error on failure.
1781  */
1782 static int cik_init_microcode(struct radeon_device *rdev)
1783 {
1784         const char *chip_name;
1785         size_t pfp_req_size, me_req_size, ce_req_size,
1786                 mec_req_size, rlc_req_size, mc_req_size = 0,
1787                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1788         char fw_name[30];
1789         int err;
1790
1791         DRM_DEBUG("\n");
1792
1793         switch (rdev->family) {
1794         case CHIP_BONAIRE:
1795                 chip_name = "BONAIRE";
1796                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1797                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1798                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1799                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1800                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1801                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1802                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1803                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1804                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1805                 break;
1806         case CHIP_HAWAII:
1807                 chip_name = "HAWAII";
1808                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1809                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1810                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1811                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1812                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1813                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1814                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1815                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1816                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1817                 break;
1818         case CHIP_KAVERI:
1819                 chip_name = "KAVERI";
1820                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1821                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1822                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1823                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1824                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1825                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1826                 break;
1827         case CHIP_KABINI:
1828                 chip_name = "KABINI";
1829                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1830                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1831                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1832                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1833                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1834                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1835                 break;
1836         default: BUG();
1837         }
1838
1839         DRM_INFO("Loading %s Microcode\n", chip_name);
1840
1841         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1842         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1843         if (err)
1844                 goto out;
1845         if (rdev->pfp_fw->size != pfp_req_size) {
1846                 printk(KERN_ERR
1847                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1848                        rdev->pfp_fw->size, fw_name);
1849                 err = -EINVAL;
1850                 goto out;
1851         }
1852
1853         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1854         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1855         if (err)
1856                 goto out;
1857         if (rdev->me_fw->size != me_req_size) {
1858                 printk(KERN_ERR
1859                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1860                        rdev->me_fw->size, fw_name);
1861                 err = -EINVAL;
1862         }
1863
1864         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1865         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1866         if (err)
1867                 goto out;
1868         if (rdev->ce_fw->size != ce_req_size) {
1869                 printk(KERN_ERR
1870                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1871                        rdev->ce_fw->size, fw_name);
1872                 err = -EINVAL;
1873         }
1874
1875         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1876         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1877         if (err)
1878                 goto out;
1879         if (rdev->mec_fw->size != mec_req_size) {
1880                 printk(KERN_ERR
1881                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1882                        rdev->mec_fw->size, fw_name);
1883                 err = -EINVAL;
1884         }
1885
1886         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1887         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1888         if (err)
1889                 goto out;
1890         if (rdev->rlc_fw->size != rlc_req_size) {
1891                 printk(KERN_ERR
1892                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1893                        rdev->rlc_fw->size, fw_name);
1894                 err = -EINVAL;
1895         }
1896
1897         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1898         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1899         if (err)
1900                 goto out;
1901         if (rdev->sdma_fw->size != sdma_req_size) {
1902                 printk(KERN_ERR
1903                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1904                        rdev->sdma_fw->size, fw_name);
1905                 err = -EINVAL;
1906         }
1907
1908         /* No SMC, MC ucode on APUs */
1909         if (!(rdev->flags & RADEON_IS_IGP)) {
1910                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1911                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1912                 if (err) {
1913                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1914                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1915                         if (err)
1916                                 goto out;
1917                 }
1918                 if ((rdev->mc_fw->size != mc_req_size) &&
1919                     (rdev->mc_fw->size != mc2_req_size)){
1920                         printk(KERN_ERR
1921                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1922                                rdev->mc_fw->size, fw_name);
1923                         err = -EINVAL;
1924                 }
1925                 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1926
1927                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1928                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1929                 if (err) {
1930                         printk(KERN_ERR
1931                                "smc: error loading firmware \"%s\"\n",
1932                                fw_name);
1933                         release_firmware(rdev->smc_fw);
1934                         rdev->smc_fw = NULL;
1935                         err = 0;
1936                 } else if (rdev->smc_fw->size != smc_req_size) {
1937                         printk(KERN_ERR
1938                                "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1939                                rdev->smc_fw->size, fw_name);
1940                         err = -EINVAL;
1941                 }
1942         }
1943
1944 out:
1945         if (err) {
1946                 if (err != -EINVAL)
1947                         printk(KERN_ERR
1948                                "cik_cp: Failed to load firmware \"%s\"\n",
1949                                fw_name);
1950                 release_firmware(rdev->pfp_fw);
1951                 rdev->pfp_fw = NULL;
1952                 release_firmware(rdev->me_fw);
1953                 rdev->me_fw = NULL;
1954                 release_firmware(rdev->ce_fw);
1955                 rdev->ce_fw = NULL;
1956                 release_firmware(rdev->rlc_fw);
1957                 rdev->rlc_fw = NULL;
1958                 release_firmware(rdev->mc_fw);
1959                 rdev->mc_fw = NULL;
1960                 release_firmware(rdev->smc_fw);
1961                 rdev->smc_fw = NULL;
1962         }
1963         return err;
1964 }
1965
1966 /*
1967  * Core functions
1968  */
1969 /**
1970  * cik_tiling_mode_table_init - init the hw tiling table
1971  *
1972  * @rdev: radeon_device pointer
1973  *
1974  * Starting with SI, the tiling setup is done globally in a
1975  * set of 32 tiling modes.  Rather than selecting each set of
1976  * parameters per surface as on older asics, we just select
1977  * which index in the tiling table we want to use, and the
1978  * surface uses those parameters (CIK).
1979  */
1980 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1981 {
1982         const u32 num_tile_mode_states = 32;
1983         const u32 num_secondary_tile_mode_states = 16;
1984         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1985         u32 num_pipe_configs;
1986         u32 num_rbs = rdev->config.cik.max_backends_per_se *
1987                 rdev->config.cik.max_shader_engines;
1988
1989         switch (rdev->config.cik.mem_row_size_in_kb) {
1990         case 1:
1991                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1992                 break;
1993         case 2:
1994         default:
1995                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1996                 break;
1997         case 4:
1998                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1999                 break;
2000         }
2001
2002         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2003         if (num_pipe_configs > 8)
2004                 num_pipe_configs = 16;
2005
2006         if (num_pipe_configs == 16) {
2007                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2008                         switch (reg_offset) {
2009                         case 0:
2010                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2011                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2012                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2013                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2014                                 break;
2015                         case 1:
2016                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2017                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2018                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2019                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2020                                 break;
2021                         case 2:
2022                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2023                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2024                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2025                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2026                                 break;
2027                         case 3:
2028                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2029                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2030                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2031                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2032                                 break;
2033                         case 4:
2034                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2035                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2036                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2037                                                  TILE_SPLIT(split_equal_to_row_size));
2038                                 break;
2039                         case 5:
2040                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2041                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2042                                 break;
2043                         case 6:
2044                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2045                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2046                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2047                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2048                                 break;
2049                         case 7:
2050                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2051                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2052                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2053                                                  TILE_SPLIT(split_equal_to_row_size));
2054                                 break;
2055                         case 8:
2056                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2057                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2058                                 break;
2059                         case 9:
2060                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2061                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2062                                 break;
2063                         case 10:
2064                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2065                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2066                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2067                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2068                                 break;
2069                         case 11:
2070                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2071                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2072                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2073                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2074                                 break;
2075                         case 12:
2076                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2077                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2078                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2079                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2080                                 break;
2081                         case 13:
2082                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2083                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2084                                 break;
2085                         case 14:
2086                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2087                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2088                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2089                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2090                                 break;
2091                         case 16:
2092                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2093                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2094                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2095                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2096                                 break;
2097                         case 17:
2098                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2099                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2100                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2101                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2102                                 break;
2103                         case 27:
2104                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2105                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2106                                 break;
2107                         case 28:
2108                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2109                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2110                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2111                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2112                                 break;
2113                         case 29:
2114                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2115                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2116                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2117                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2118                                 break;
2119                         case 30:
2120                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2121                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2122                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2123                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2124                                 break;
2125                         default:
2126                                 gb_tile_moden = 0;
2127                                 break;
2128                         }
2129                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2130                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2131                 }
2132                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2133                         switch (reg_offset) {
2134                         case 0:
2135                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2136                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2137                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2138                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2139                                 break;
2140                         case 1:
2141                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2142                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2143                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2144                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2145                                 break;
2146                         case 2:
2147                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2148                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2149                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2150                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2151                                 break;
2152                         case 3:
2153                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2154                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2155                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2156                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2157                                 break;
2158                         case 4:
2159                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2160                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2161                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2162                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2163                                 break;
2164                         case 5:
2165                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2166                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2167                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2168                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2169                                 break;
2170                         case 6:
2171                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2172                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2173                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2174                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2175                                 break;
2176                         case 8:
2177                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2178                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2179                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2180                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2181                                 break;
2182                         case 9:
2183                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2184                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2185                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2186                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2187                                 break;
2188                         case 10:
2189                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2190                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2191                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2192                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2193                                 break;
2194                         case 11:
2195                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2196                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2197                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2198                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2199                                 break;
2200                         case 12:
2201                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2202                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2203                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2204                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2205                                 break;
2206                         case 13:
2207                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2208                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2209                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2210                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2211                                 break;
2212                         case 14:
2213                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2214                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2215                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2216                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2217                                 break;
2218                         default:
2219                                 gb_tile_moden = 0;
2220                                 break;
2221                         }
2222                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2223                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2224                 }
2225         } else if (num_pipe_configs == 8) {
2226                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2227                         switch (reg_offset) {
2228                         case 0:
2229                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2230                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2231                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2232                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2233                                 break;
2234                         case 1:
2235                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2236                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2237                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2238                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2239                                 break;
2240                         case 2:
2241                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2242                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2243                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2244                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2245                                 break;
2246                         case 3:
2247                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2248                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2249                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2250                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2251                                 break;
2252                         case 4:
2253                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2254                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2255                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2256                                                  TILE_SPLIT(split_equal_to_row_size));
2257                                 break;
2258                         case 5:
2259                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2260                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2261                                 break;
2262                         case 6:
2263                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2264                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2265                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2266                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2267                                 break;
2268                         case 7:
2269                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2270                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2271                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2272                                                  TILE_SPLIT(split_equal_to_row_size));
2273                                 break;
2274                         case 8:
2275                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2276                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2277                                 break;
2278                         case 9:
2279                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2280                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2281                                 break;
2282                         case 10:
2283                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2284                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2285                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2286                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2287                                 break;
2288                         case 11:
2289                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2290                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2291                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2292                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2293                                 break;
2294                         case 12:
2295                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2296                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2297                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2298                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2299                                 break;
2300                         case 13:
2301                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2302                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2303                                 break;
2304                         case 14:
2305                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2306                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2307                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2308                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2309                                 break;
2310                         case 16:
2311                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2312                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2313                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2314                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2315                                 break;
2316                         case 17:
2317                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2318                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2319                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2320                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2321                                 break;
2322                         case 27:
2323                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2324                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2325                                 break;
2326                         case 28:
2327                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2329                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2330                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2331                                 break;
2332                         case 29:
2333                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2334                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2335                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2336                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2337                                 break;
2338                         case 30:
2339                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2340                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2341                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2342                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2343                                 break;
2344                         default:
2345                                 gb_tile_moden = 0;
2346                                 break;
2347                         }
2348                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2349                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2350                 }
2351                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2352                         switch (reg_offset) {
2353                         case 0:
2354                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2355                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2356                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2357                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2358                                 break;
2359                         case 1:
2360                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2361                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2362                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2363                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2364                                 break;
2365                         case 2:
2366                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2367                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2368                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2369                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2370                                 break;
2371                         case 3:
2372                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2373                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2374                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2375                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2376                                 break;
2377                         case 4:
2378                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2379                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2380                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2381                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2382                                 break;
2383                         case 5:
2384                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2385                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2386                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2387                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2388                                 break;
2389                         case 6:
2390                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2391                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2392                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2393                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2394                                 break;
2395                         case 8:
2396                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2397                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2398                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2399                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2400                                 break;
2401                         case 9:
2402                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2403                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2404                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2405                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2406                                 break;
2407                         case 10:
2408                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2409                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2410                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2411                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2412                                 break;
2413                         case 11:
2414                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2416                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2417                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2418                                 break;
2419                         case 12:
2420                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2421                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2422                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2423                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2424                                 break;
2425                         case 13:
2426                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2427                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2428                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2429                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2430                                 break;
2431                         case 14:
2432                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2433                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2434                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2435                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2436                                 break;
2437                         default:
2438                                 gb_tile_moden = 0;
2439                                 break;
2440                         }
2441                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2442                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2443                 }
2444         } else if (num_pipe_configs == 4) {
2445                 if (num_rbs == 4) {
2446                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2447                                 switch (reg_offset) {
2448                                 case 0:
2449                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2450                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2451                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2452                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2453                                         break;
2454                                 case 1:
2455                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2456                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2457                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2458                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2459                                         break;
2460                                 case 2:
2461                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2462                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2463                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2464                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2465                                         break;
2466                                 case 3:
2467                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2468                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2469                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2470                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2471                                         break;
2472                                 case 4:
2473                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2474                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2475                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2476                                                          TILE_SPLIT(split_equal_to_row_size));
2477                                         break;
2478                                 case 5:
2479                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2480                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2481                                         break;
2482                                 case 6:
2483                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2484                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2485                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2486                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2487                                         break;
2488                                 case 7:
2489                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2490                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2491                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2492                                                          TILE_SPLIT(split_equal_to_row_size));
2493                                         break;
2494                                 case 8:
2495                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2496                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2497                                         break;
2498                                 case 9:
2499                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2500                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2501                                         break;
2502                                 case 10:
2503                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2505                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2506                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2507                                         break;
2508                                 case 11:
2509                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2510                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2511                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2512                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2513                                         break;
2514                                 case 12:
2515                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2516                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2517                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2518                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2519                                         break;
2520                                 case 13:
2521                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2522                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2523                                         break;
2524                                 case 14:
2525                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2527                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2528                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2529                                         break;
2530                                 case 16:
2531                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2532                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2533                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2534                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2535                                         break;
2536                                 case 17:
2537                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2538                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2539                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2540                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2541                                         break;
2542                                 case 27:
2543                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2544                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2545                                         break;
2546                                 case 28:
2547                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2548                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2549                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2550                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2551                                         break;
2552                                 case 29:
2553                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2554                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2555                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2556                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557                                         break;
2558                                 case 30:
2559                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2560                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2561                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2562                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2563                                         break;
2564                                 default:
2565                                         gb_tile_moden = 0;
2566                                         break;
2567                                 }
2568                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2569                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2570                         }
2571                 } else if (num_rbs < 4) {
2572                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2573                                 switch (reg_offset) {
2574                                 case 0:
2575                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2577                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2578                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2579                                         break;
2580                                 case 1:
2581                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2582                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2583                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2584                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2585                                         break;
2586                                 case 2:
2587                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2588                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2589                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2590                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2591                                         break;
2592                                 case 3:
2593                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2594                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2595                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2596                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2597                                         break;
2598                                 case 4:
2599                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2600                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2601                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2602                                                          TILE_SPLIT(split_equal_to_row_size));
2603                                         break;
2604                                 case 5:
2605                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2606                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2607                                         break;
2608                                 case 6:
2609                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2610                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2611                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2612                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2613                                         break;
2614                                 case 7:
2615                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2616                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2617                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2618                                                          TILE_SPLIT(split_equal_to_row_size));
2619                                         break;
2620                                 case 8:
2621                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2622                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2623                                         break;
2624                                 case 9:
2625                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2626                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2627                                         break;
2628                                 case 10:
2629                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2630                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2631                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2632                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2633                                         break;
2634                                 case 11:
2635                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2636                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2637                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2638                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2639                                         break;
2640                                 case 12:
2641                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2642                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2643                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2644                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2645                                         break;
2646                                 case 13:
2647                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2648                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2649                                         break;
2650                                 case 14:
2651                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2652                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2653                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2654                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2655                                         break;
2656                                 case 16:
2657                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2658                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2659                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2660                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2661                                         break;
2662                                 case 17:
2663                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2664                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2665                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2666                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2667                                         break;
2668                                 case 27:
2669                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2670                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2671                                         break;
2672                                 case 28:
2673                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2674                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2675                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2676                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2677                                         break;
2678                                 case 29:
2679                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2680                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2681                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2682                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2683                                         break;
2684                                 case 30:
2685                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2686                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2687                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2688                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2689                                         break;
2690                                 default:
2691                                         gb_tile_moden = 0;
2692                                         break;
2693                                 }
2694                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2695                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2696                         }
2697                 }
2698                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2699                         switch (reg_offset) {
2700                         case 0:
2701                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2702                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2703                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2704                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2705                                 break;
2706                         case 1:
2707                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2708                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2709                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2710                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2711                                 break;
2712                         case 2:
2713                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2714                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2715                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2716                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2717                                 break;
2718                         case 3:
2719                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2720                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2721                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2722                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2723                                 break;
2724                         case 4:
2725                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2726                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2727                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2728                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2729                                 break;
2730                         case 5:
2731                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2732                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2733                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2734                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2735                                 break;
2736                         case 6:
2737                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2738                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2739                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2740                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2741                                 break;
2742                         case 8:
2743                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2744                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2745                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2746                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2747                                 break;
2748                         case 9:
2749                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2750                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2751                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2752                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2753                                 break;
2754                         case 10:
2755                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2757                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2758                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2759                                 break;
2760                         case 11:
2761                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2762                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2763                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2764                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2765                                 break;
2766                         case 12:
2767                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2769                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2770                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2771                                 break;
2772                         case 13:
2773                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2774                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2775                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2776                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2777                                 break;
2778                         case 14:
2779                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2780                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2781                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2782                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2783                                 break;
2784                         default:
2785                                 gb_tile_moden = 0;
2786                                 break;
2787                         }
2788                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2789                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2790                 }
2791         } else if (num_pipe_configs == 2) {
2792                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2793                         switch (reg_offset) {
2794                         case 0:
2795                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2797                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2798                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2799                                 break;
2800                         case 1:
2801                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2802                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2803                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2804                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2805                                 break;
2806                         case 2:
2807                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2808                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2809                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2810                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2811                                 break;
2812                         case 3:
2813                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2814                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2815                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2816                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2817                                 break;
2818                         case 4:
2819                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2820                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2821                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2822                                                  TILE_SPLIT(split_equal_to_row_size));
2823                                 break;
2824                         case 5:
2825                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2826                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2827                                 break;
2828                         case 6:
2829                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2830                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2831                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2832                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2833                                 break;
2834                         case 7:
2835                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2836                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2837                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2838                                                  TILE_SPLIT(split_equal_to_row_size));
2839                                 break;
2840                         case 8:
2841                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2842                                 break;
2843                         case 9:
2844                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2845                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2846                                 break;
2847                         case 10:
2848                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2849                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2850                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2851                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2852                                 break;
2853                         case 11:
2854                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2855                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2856                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2857                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2858                                 break;
2859                         case 12:
2860                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2861                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2862                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2863                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2864                                 break;
2865                         case 13:
2866                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2867                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2868                                 break;
2869                         case 14:
2870                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2871                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2872                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2873                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2874                                 break;
2875                         case 16:
2876                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2877                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2878                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2879                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2880                                 break;
2881                         case 17:
2882                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2883                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2884                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2885                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2886                                 break;
2887                         case 27:
2888                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2889                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2890                                 break;
2891                         case 28:
2892                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2893                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2894                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2895                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2896                                 break;
2897                         case 29:
2898                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2899                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2900                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2901                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2902                                 break;
2903                         case 30:
2904                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2905                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2906                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2907                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2908                                 break;
2909                         default:
2910                                 gb_tile_moden = 0;
2911                                 break;
2912                         }
2913                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2914                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2915                 }
2916                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2917                         switch (reg_offset) {
2918                         case 0:
2919                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2920                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2921                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2922                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2923                                 break;
2924                         case 1:
2925                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2926                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2927                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2928                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2929                                 break;
2930                         case 2:
2931                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2932                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2933                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2934                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2935                                 break;
2936                         case 3:
2937                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2938                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2939                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2940                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2941                                 break;
2942                         case 4:
2943                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2944                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2945                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2946                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2947                                 break;
2948                         case 5:
2949                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2950                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2951                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2952                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2953                                 break;
2954                         case 6:
2955                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2956                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2957                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2958                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2959                                 break;
2960                         case 8:
2961                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2962                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2963                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2964                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2965                                 break;
2966                         case 9:
2967                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2968                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2969                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2971                                 break;
2972                         case 10:
2973                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2974                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2975                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2976                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2977                                 break;
2978                         case 11:
2979                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2980                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2981                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2983                                 break;
2984                         case 12:
2985                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2986                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2987                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2988                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2989                                 break;
2990                         case 13:
2991                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2993                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2995                                 break;
2996                         case 14:
2997                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2998                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2999                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3000                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3001                                 break;
3002                         default:
3003                                 gb_tile_moden = 0;
3004                                 break;
3005                         }
3006                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3007                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3008                 }
3009         } else
3010                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3011 }
3012
3013 /**
3014  * cik_select_se_sh - select which SE, SH to address
3015  *
3016  * @rdev: radeon_device pointer
3017  * @se_num: shader engine to address
3018  * @sh_num: sh block to address
3019  *
3020  * Select which SE, SH combinations to address. Certain
3021  * registers are instanced per SE or SH.  0xffffffff means
3022  * broadcast to all SEs or SHs (CIK).
3023  */
3024 static void cik_select_se_sh(struct radeon_device *rdev,
3025                              u32 se_num, u32 sh_num)
3026 {
3027         u32 data = INSTANCE_BROADCAST_WRITES;
3028
3029         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3030                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3031         else if (se_num == 0xffffffff)
3032                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3033         else if (sh_num == 0xffffffff)
3034                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3035         else
3036                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3037         WREG32(GRBM_GFX_INDEX, data);
3038 }
3039
3040 /**
3041  * cik_create_bitmask - create a bitmask
3042  *
3043  * @bit_width: length of the mask
3044  *
3045  * create a variable length bit mask (CIK).
3046  * Returns the bitmask.
3047  */
3048 static u32 cik_create_bitmask(u32 bit_width)
3049 {
3050         u32 i, mask = 0;
3051
3052         for (i = 0; i < bit_width; i++) {
3053                 mask <<= 1;
3054                 mask |= 1;
3055         }
3056         return mask;
3057 }
3058
3059 /**
3060  * cik_get_rb_disabled - computes the mask of disabled RBs
3061  *
3062  * @rdev: radeon_device pointer
3063  * @max_rb_num: max RBs (render backends) for the asic
3064  * @se_num: number of SEs (shader engines) for the asic
3065  * @sh_per_se: number of SH blocks per SE for the asic
3066  *
3067  * Calculates the bitmask of disabled RBs (CIK).
3068  * Returns the disabled RB bitmask.
3069  */
3070 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3071                               u32 max_rb_num_per_se,
3072                               u32 sh_per_se)
3073 {
3074         u32 data, mask;
3075
3076         data = RREG32(CC_RB_BACKEND_DISABLE);
3077         if (data & 1)
3078                 data &= BACKEND_DISABLE_MASK;
3079         else
3080                 data = 0;
3081         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3082
3083         data >>= BACKEND_DISABLE_SHIFT;
3084
3085         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3086
3087         return data & mask;
3088 }
3089
3090 /**
3091  * cik_setup_rb - setup the RBs on the asic
3092  *
3093  * @rdev: radeon_device pointer
3094  * @se_num: number of SEs (shader engines) for the asic
3095  * @sh_per_se: number of SH blocks per SE for the asic
3096  * @max_rb_num: max RBs (render backends) for the asic
3097  *
3098  * Configures per-SE/SH RB registers (CIK).
3099  */
3100 static void cik_setup_rb(struct radeon_device *rdev,
3101                          u32 se_num, u32 sh_per_se,
3102                          u32 max_rb_num_per_se)
3103 {
3104         int i, j;
3105         u32 data, mask;
3106         u32 disabled_rbs = 0;
3107         u32 enabled_rbs = 0;
3108
3109         for (i = 0; i < se_num; i++) {
3110                 for (j = 0; j < sh_per_se; j++) {
3111                         cik_select_se_sh(rdev, i, j);
3112                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3113                         if (rdev->family == CHIP_HAWAII)
3114                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3115                         else
3116                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3117                 }
3118         }
3119         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3120
3121         mask = 1;
3122         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3123                 if (!(disabled_rbs & mask))
3124                         enabled_rbs |= mask;
3125                 mask <<= 1;
3126         }
3127
3128         rdev->config.cik.backend_enable_mask = enabled_rbs;
3129
3130         for (i = 0; i < se_num; i++) {
3131                 cik_select_se_sh(rdev, i, 0xffffffff);
3132                 data = 0;
3133                 for (j = 0; j < sh_per_se; j++) {
3134                         switch (enabled_rbs & 3) {
3135                         case 0:
3136                                 if (j == 0)
3137                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3138                                 else
3139                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3140                                 break;
3141                         case 1:
3142                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3143                                 break;
3144                         case 2:
3145                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3146                                 break;
3147                         case 3:
3148                         default:
3149                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3150                                 break;
3151                         }
3152                         enabled_rbs >>= 2;
3153                 }
3154                 WREG32(PA_SC_RASTER_CONFIG, data);
3155         }
3156         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3157 }
3158
3159 /**
3160  * cik_gpu_init - setup the 3D engine
3161  *
3162  * @rdev: radeon_device pointer
3163  *
3164  * Configures the 3D engine and tiling configuration
3165  * registers so that the 3D engine is usable.
3166  */
3167 static void cik_gpu_init(struct radeon_device *rdev)
3168 {
3169         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3170         u32 mc_shared_chmap, mc_arb_ramcfg;
3171         u32 hdp_host_path_cntl;
3172         u32 tmp;
3173         int i, j;
3174
3175         switch (rdev->family) {
3176         case CHIP_BONAIRE:
3177                 rdev->config.cik.max_shader_engines = 2;
3178                 rdev->config.cik.max_tile_pipes = 4;
3179                 rdev->config.cik.max_cu_per_sh = 7;
3180                 rdev->config.cik.max_sh_per_se = 1;
3181                 rdev->config.cik.max_backends_per_se = 2;
3182                 rdev->config.cik.max_texture_channel_caches = 4;
3183                 rdev->config.cik.max_gprs = 256;
3184                 rdev->config.cik.max_gs_threads = 32;
3185                 rdev->config.cik.max_hw_contexts = 8;
3186
3187                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3188                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3189                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3190                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3191                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3192                 break;
3193         case CHIP_HAWAII:
3194                 rdev->config.cik.max_shader_engines = 4;
3195                 rdev->config.cik.max_tile_pipes = 16;
3196                 rdev->config.cik.max_cu_per_sh = 11;
3197                 rdev->config.cik.max_sh_per_se = 1;
3198                 rdev->config.cik.max_backends_per_se = 4;
3199                 rdev->config.cik.max_texture_channel_caches = 16;
3200                 rdev->config.cik.max_gprs = 256;
3201                 rdev->config.cik.max_gs_threads = 32;
3202                 rdev->config.cik.max_hw_contexts = 8;
3203
3204                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3205                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3206                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3207                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3208                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3209                 break;
3210         case CHIP_KAVERI:
3211                 rdev->config.cik.max_shader_engines = 1;
3212                 rdev->config.cik.max_tile_pipes = 4;
3213                 if ((rdev->pdev->device == 0x1304) ||
3214                     (rdev->pdev->device == 0x1305) ||
3215                     (rdev->pdev->device == 0x130C) ||
3216                     (rdev->pdev->device == 0x130F) ||
3217                     (rdev->pdev->device == 0x1310) ||
3218                     (rdev->pdev->device == 0x1311) ||
3219                     (rdev->pdev->device == 0x131C)) {
3220                         rdev->config.cik.max_cu_per_sh = 8;
3221                         rdev->config.cik.max_backends_per_se = 2;
3222                 } else if ((rdev->pdev->device == 0x1309) ||
3223                            (rdev->pdev->device == 0x130A) ||
3224                            (rdev->pdev->device == 0x130D) ||
3225                            (rdev->pdev->device == 0x1313) ||
3226                            (rdev->pdev->device == 0x131D)) {
3227                         rdev->config.cik.max_cu_per_sh = 6;
3228                         rdev->config.cik.max_backends_per_se = 2;
3229                 } else if ((rdev->pdev->device == 0x1306) ||
3230                            (rdev->pdev->device == 0x1307) ||
3231                            (rdev->pdev->device == 0x130B) ||
3232                            (rdev->pdev->device == 0x130E) ||
3233                            (rdev->pdev->device == 0x1315) ||
3234                            (rdev->pdev->device == 0x1318) ||
3235                            (rdev->pdev->device == 0x131B)) {
3236                         rdev->config.cik.max_cu_per_sh = 4;
3237                         rdev->config.cik.max_backends_per_se = 1;
3238                 } else {
3239                         rdev->config.cik.max_cu_per_sh = 3;
3240                         rdev->config.cik.max_backends_per_se = 1;
3241                 }
3242                 rdev->config.cik.max_sh_per_se = 1;
3243                 rdev->config.cik.max_texture_channel_caches = 4;
3244                 rdev->config.cik.max_gprs = 256;
3245                 rdev->config.cik.max_gs_threads = 16;
3246                 rdev->config.cik.max_hw_contexts = 8;
3247
3248                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3249                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3250                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3251                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3252                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3253                 break;
3254         case CHIP_KABINI:
3255         default:
3256                 rdev->config.cik.max_shader_engines = 1;
3257                 rdev->config.cik.max_tile_pipes = 2;
3258                 rdev->config.cik.max_cu_per_sh = 2;
3259                 rdev->config.cik.max_sh_per_se = 1;
3260                 rdev->config.cik.max_backends_per_se = 1;
3261                 rdev->config.cik.max_texture_channel_caches = 2;
3262                 rdev->config.cik.max_gprs = 256;
3263                 rdev->config.cik.max_gs_threads = 16;
3264                 rdev->config.cik.max_hw_contexts = 8;
3265
3266                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3267                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3268                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3269                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3270                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3271                 break;
3272         }
3273
3274         /* Initialize HDP */
3275         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3276                 WREG32((0x2c14 + j), 0x00000000);
3277                 WREG32((0x2c18 + j), 0x00000000);
3278                 WREG32((0x2c1c + j), 0x00000000);
3279                 WREG32((0x2c20 + j), 0x00000000);
3280                 WREG32((0x2c24 + j), 0x00000000);
3281         }
3282
3283         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3284
3285         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3286
3287         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3288         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3289
3290         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3291         rdev->config.cik.mem_max_burst_length_bytes = 256;
3292         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3293         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3294         if (rdev->config.cik.mem_row_size_in_kb > 4)
3295                 rdev->config.cik.mem_row_size_in_kb = 4;
3296         /* XXX use MC settings? */
3297         rdev->config.cik.shader_engine_tile_size = 32;
3298         rdev->config.cik.num_gpus = 1;
3299         rdev->config.cik.multi_gpu_tile_size = 64;
3300
3301         /* fix up row size */
3302         gb_addr_config &= ~ROW_SIZE_MASK;
3303         switch (rdev->config.cik.mem_row_size_in_kb) {
3304         case 1:
3305         default:
3306                 gb_addr_config |= ROW_SIZE(0);
3307                 break;
3308         case 2:
3309                 gb_addr_config |= ROW_SIZE(1);
3310                 break;
3311         case 4:
3312                 gb_addr_config |= ROW_SIZE(2);
3313                 break;
3314         }
3315
3316         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3317          * not have bank info, so create a custom tiling dword.
3318          * bits 3:0   num_pipes
3319          * bits 7:4   num_banks
3320          * bits 11:8  group_size
3321          * bits 15:12 row_size
3322          */
3323         rdev->config.cik.tile_config = 0;
3324         switch (rdev->config.cik.num_tile_pipes) {
3325         case 1:
3326                 rdev->config.cik.tile_config |= (0 << 0);
3327                 break;
3328         case 2:
3329                 rdev->config.cik.tile_config |= (1 << 0);
3330                 break;
3331         case 4:
3332                 rdev->config.cik.tile_config |= (2 << 0);
3333                 break;
3334         case 8:
3335         default:
3336                 /* XXX what about 12? */
3337                 rdev->config.cik.tile_config |= (3 << 0);
3338                 break;
3339         }
3340         rdev->config.cik.tile_config |=
3341                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3342         rdev->config.cik.tile_config |=
3343                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3344         rdev->config.cik.tile_config |=
3345                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3346
3347         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3348         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3349         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3350         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3351         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3352         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3353         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3354         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3355
3356         cik_tiling_mode_table_init(rdev);
3357
3358         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3359                      rdev->config.cik.max_sh_per_se,
3360                      rdev->config.cik.max_backends_per_se);
3361
3362         /* set HW defaults for 3D engine */
3363         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3364
3365         WREG32(SX_DEBUG_1, 0x20);
3366
3367         WREG32(TA_CNTL_AUX, 0x00010000);
3368
3369         tmp = RREG32(SPI_CONFIG_CNTL);
3370         tmp |= 0x03000000;
3371         WREG32(SPI_CONFIG_CNTL, tmp);
3372
3373         WREG32(SQ_CONFIG, 1);
3374
3375         WREG32(DB_DEBUG, 0);
3376
3377         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3378         tmp |= 0x00000400;
3379         WREG32(DB_DEBUG2, tmp);
3380
3381         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3382         tmp |= 0x00020200;
3383         WREG32(DB_DEBUG3, tmp);
3384
3385         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3386         tmp |= 0x00018208;
3387         WREG32(CB_HW_CONTROL, tmp);
3388
3389         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3390
3391         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3392                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3393                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3394                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3395
3396         WREG32(VGT_NUM_INSTANCES, 1);
3397
3398         WREG32(CP_PERFMON_CNTL, 0);
3399
3400         WREG32(SQ_CONFIG, 0);
3401
3402         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3403                                           FORCE_EOV_MAX_REZ_CNT(255)));
3404
3405         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3406                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3407
3408         WREG32(VGT_GS_VERTEX_REUSE, 16);
3409         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3410
3411         tmp = RREG32(HDP_MISC_CNTL);
3412         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3413         WREG32(HDP_MISC_CNTL, tmp);
3414
3415         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3416         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3417
3418         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3419         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3420
3421         udelay(50);
3422 }
3423
3424 /*
3425  * GPU scratch registers helpers function.
3426  */
3427 /**
3428  * cik_scratch_init - setup driver info for CP scratch regs
3429  *
3430  * @rdev: radeon_device pointer
3431  *
3432  * Set up the number and offset of the CP scratch registers.
3433  * NOTE: use of CP scratch registers is a legacy inferface and
3434  * is not used by default on newer asics (r6xx+).  On newer asics,
3435  * memory buffers are used for fences rather than scratch regs.
3436  */
3437 static void cik_scratch_init(struct radeon_device *rdev)
3438 {
3439         int i;
3440
3441         rdev->scratch.num_reg = 7;
3442         rdev->scratch.reg_base = SCRATCH_REG0;
3443         for (i = 0; i < rdev->scratch.num_reg; i++) {
3444                 rdev->scratch.free[i] = true;
3445                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3446         }
3447 }
3448
3449 /**
3450  * cik_ring_test - basic gfx ring test
3451  *
3452  * @rdev: radeon_device pointer
3453  * @ring: radeon_ring structure holding ring information
3454  *
3455  * Allocate a scratch register and write to it using the gfx ring (CIK).
3456  * Provides a basic gfx ring test to verify that the ring is working.
3457  * Used by cik_cp_gfx_resume();
3458  * Returns 0 on success, error on failure.
3459  */
3460 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3461 {
3462         uint32_t scratch;
3463         uint32_t tmp = 0;
3464         unsigned i;
3465         int r;
3466
3467         r = radeon_scratch_get(rdev, &scratch);
3468         if (r) {
3469                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3470                 return r;
3471         }
3472         WREG32(scratch, 0xCAFEDEAD);
3473         r = radeon_ring_lock(rdev, ring, 3);
3474         if (r) {
3475                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3476                 radeon_scratch_free(rdev, scratch);
3477                 return r;
3478         }
3479         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3480         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3481         radeon_ring_write(ring, 0xDEADBEEF);
3482         radeon_ring_unlock_commit(rdev, ring);
3483
3484         for (i = 0; i < rdev->usec_timeout; i++) {
3485                 tmp = RREG32(scratch);
3486                 if (tmp == 0xDEADBEEF)
3487                         break;
3488                 DRM_UDELAY(1);
3489         }
3490         if (i < rdev->usec_timeout) {
3491                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3492         } else {
3493                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3494                           ring->idx, scratch, tmp);
3495                 r = -EINVAL;
3496         }
3497         radeon_scratch_free(rdev, scratch);
3498         return r;
3499 }
3500
3501 /**
3502  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3503  *
3504  * @rdev: radeon_device pointer
3505  * @ridx: radeon ring index
3506  *
3507  * Emits an hdp flush on the cp.
3508  */
3509 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3510                                        int ridx)
3511 {
3512         struct radeon_ring *ring = &rdev->ring[ridx];
3513         u32 ref_and_mask;
3514
3515         switch (ring->idx) {
3516         case CAYMAN_RING_TYPE_CP1_INDEX:
3517         case CAYMAN_RING_TYPE_CP2_INDEX:
3518         default:
3519                 switch (ring->me) {
3520                 case 0:
3521                         ref_and_mask = CP2 << ring->pipe;
3522                         break;
3523                 case 1:
3524                         ref_and_mask = CP6 << ring->pipe;
3525                         break;
3526                 default:
3527                         return;
3528                 }
3529                 break;
3530         case RADEON_RING_TYPE_GFX_INDEX:
3531                 ref_and_mask = CP0;
3532                 break;
3533         }
3534
3535         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3536         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3537                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3538                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3539         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3540         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3541         radeon_ring_write(ring, ref_and_mask);
3542         radeon_ring_write(ring, ref_and_mask);
3543         radeon_ring_write(ring, 0x20); /* poll interval */
3544 }
3545
3546 /**
3547  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3548  *
3549  * @rdev: radeon_device pointer
3550  * @fence: radeon fence object
3551  *
3552  * Emits a fence sequnce number on the gfx ring and flushes
3553  * GPU caches.
3554  */
3555 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3556                              struct radeon_fence *fence)
3557 {
3558         struct radeon_ring *ring = &rdev->ring[fence->ring];
3559         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3560
3561         /* EVENT_WRITE_EOP - flush caches, send int */
3562         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3563         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3564                                  EOP_TC_ACTION_EN |
3565                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3566                                  EVENT_INDEX(5)));
3567         radeon_ring_write(ring, addr & 0xfffffffc);
3568         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3569         radeon_ring_write(ring, fence->seq);
3570         radeon_ring_write(ring, 0);
3571         /* HDP flush */
3572         cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3573 }
3574
3575 /**
3576  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3577  *
3578  * @rdev: radeon_device pointer
3579  * @fence: radeon fence object
3580  *
3581  * Emits a fence sequnce number on the compute ring and flushes
3582  * GPU caches.
3583  */
3584 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3585                                  struct radeon_fence *fence)
3586 {
3587         struct radeon_ring *ring = &rdev->ring[fence->ring];
3588         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3589
3590         /* RELEASE_MEM - flush caches, send int */
3591         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3592         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3593                                  EOP_TC_ACTION_EN |
3594                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3595                                  EVENT_INDEX(5)));
3596         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3597         radeon_ring_write(ring, addr & 0xfffffffc);
3598         radeon_ring_write(ring, upper_32_bits(addr));
3599         radeon_ring_write(ring, fence->seq);
3600         radeon_ring_write(ring, 0);
3601         /* HDP flush */
3602         cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3603 }
3604
3605 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3606                              struct radeon_ring *ring,
3607                              struct radeon_semaphore *semaphore,
3608                              bool emit_wait)
3609 {
3610         uint64_t addr = semaphore->gpu_addr;
3611         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3612
3613         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3614         radeon_ring_write(ring, addr & 0xffffffff);
3615         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3616
3617         return true;
3618 }
3619
3620 /**
3621  * cik_copy_cpdma - copy pages using the CP DMA engine
3622  *
3623  * @rdev: radeon_device pointer
3624  * @src_offset: src GPU address
3625  * @dst_offset: dst GPU address
3626  * @num_gpu_pages: number of GPU pages to xfer
3627  * @fence: radeon fence object
3628  *
3629  * Copy GPU paging using the CP DMA engine (CIK+).
3630  * Used by the radeon ttm implementation to move pages if
3631  * registered as the asic copy callback.
3632  */
3633 int cik_copy_cpdma(struct radeon_device *rdev,
3634                    uint64_t src_offset, uint64_t dst_offset,
3635                    unsigned num_gpu_pages,
3636                    struct radeon_fence **fence)
3637 {
3638         struct radeon_semaphore *sem = NULL;
3639         int ring_index = rdev->asic->copy.blit_ring_index;
3640         struct radeon_ring *ring = &rdev->ring[ring_index];
3641         u32 size_in_bytes, cur_size_in_bytes, control;
3642         int i, num_loops;
3643         int r = 0;
3644
3645         r = radeon_semaphore_create(rdev, &sem);
3646         if (r) {
3647                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3648                 return r;
3649         }
3650
3651         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3652         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3653         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3654         if (r) {
3655                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3656                 radeon_semaphore_free(rdev, &sem, NULL);
3657                 return r;
3658         }
3659
3660         radeon_semaphore_sync_to(sem, *fence);
3661         radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3662
3663         for (i = 0; i < num_loops; i++) {
3664                 cur_size_in_bytes = size_in_bytes;
3665                 if (cur_size_in_bytes > 0x1fffff)
3666                         cur_size_in_bytes = 0x1fffff;
3667                 size_in_bytes -= cur_size_in_bytes;
3668                 control = 0;
3669                 if (size_in_bytes == 0)
3670                         control |= PACKET3_DMA_DATA_CP_SYNC;
3671                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3672                 radeon_ring_write(ring, control);
3673                 radeon_ring_write(ring, lower_32_bits(src_offset));
3674                 radeon_ring_write(ring, upper_32_bits(src_offset));
3675                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3676                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3677                 radeon_ring_write(ring, cur_size_in_bytes);
3678                 src_offset += cur_size_in_bytes;
3679                 dst_offset += cur_size_in_bytes;
3680         }
3681
3682         r = radeon_fence_emit(rdev, fence, ring->idx);
3683         if (r) {
3684                 radeon_ring_unlock_undo(rdev, ring);
3685                 return r;
3686         }
3687
3688         radeon_ring_unlock_commit(rdev, ring);
3689         radeon_semaphore_free(rdev, &sem, *fence);
3690
3691         return r;
3692 }
3693
3694 /*
3695  * IB stuff
3696  */
3697 /**
3698  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3699  *
3700  * @rdev: radeon_device pointer
3701  * @ib: radeon indirect buffer object
3702  *
3703  * Emits an DE (drawing engine) or CE (constant engine) IB
3704  * on the gfx ring.  IBs are usually generated by userspace
3705  * acceleration drivers and submitted to the kernel for
3706  * sheduling on the ring.  This function schedules the IB
3707  * on the gfx ring for execution by the GPU.
3708  */
3709 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3710 {
3711         struct radeon_ring *ring = &rdev->ring[ib->ring];
3712         u32 header, control = INDIRECT_BUFFER_VALID;
3713
3714         if (ib->is_const_ib) {
3715                 /* set switch buffer packet before const IB */
3716                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3717                 radeon_ring_write(ring, 0);
3718
3719                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3720         } else {
3721                 u32 next_rptr;
3722                 if (ring->rptr_save_reg) {
3723                         next_rptr = ring->wptr + 3 + 4;
3724                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3725                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3726                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3727                         radeon_ring_write(ring, next_rptr);
3728                 } else if (rdev->wb.enabled) {
3729                         next_rptr = ring->wptr + 5 + 4;
3730                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3731                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3732                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3733                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3734                         radeon_ring_write(ring, next_rptr);
3735                 }
3736
3737                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3738         }
3739
3740         control |= ib->length_dw |
3741                 (ib->vm ? (ib->vm->id << 24) : 0);
3742
3743         radeon_ring_write(ring, header);
3744         radeon_ring_write(ring,
3745 #ifdef __BIG_ENDIAN
3746                           (2 << 0) |
3747 #endif
3748                           (ib->gpu_addr & 0xFFFFFFFC));
3749         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3750         radeon_ring_write(ring, control);
3751 }
3752
3753 /**
3754  * cik_ib_test - basic gfx ring IB test
3755  *
3756  * @rdev: radeon_device pointer
3757  * @ring: radeon_ring structure holding ring information
3758  *
3759  * Allocate an IB and execute it on the gfx ring (CIK).
3760  * Provides a basic gfx ring test to verify that IBs are working.
3761  * Returns 0 on success, error on failure.
3762  */
3763 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3764 {
3765         struct radeon_ib ib;
3766         uint32_t scratch;
3767         uint32_t tmp = 0;
3768         unsigned i;
3769         int r;
3770
3771         r = radeon_scratch_get(rdev, &scratch);
3772         if (r) {
3773                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3774                 return r;
3775         }
3776         WREG32(scratch, 0xCAFEDEAD);
3777         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3778         if (r) {
3779                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3780                 radeon_scratch_free(rdev, scratch);
3781                 return r;
3782         }
3783         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3784         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3785         ib.ptr[2] = 0xDEADBEEF;
3786         ib.length_dw = 3;
3787         r = radeon_ib_schedule(rdev, &ib, NULL);
3788         if (r) {
3789                 radeon_scratch_free(rdev, scratch);
3790                 radeon_ib_free(rdev, &ib);
3791                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3792                 return r;
3793         }
3794         r = radeon_fence_wait(ib.fence, false);
3795         if (r) {
3796                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3797                 radeon_scratch_free(rdev, scratch);
3798                 radeon_ib_free(rdev, &ib);
3799                 return r;
3800         }
3801         for (i = 0; i < rdev->usec_timeout; i++) {
3802                 tmp = RREG32(scratch);
3803                 if (tmp == 0xDEADBEEF)
3804                         break;
3805                 DRM_UDELAY(1);
3806         }
3807         if (i < rdev->usec_timeout) {
3808                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3809         } else {
3810                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3811                           scratch, tmp);
3812                 r = -EINVAL;
3813         }
3814         radeon_scratch_free(rdev, scratch);
3815         radeon_ib_free(rdev, &ib);
3816         return r;
3817 }
3818
3819 /*
3820  * CP.
3821  * On CIK, gfx and compute now have independant command processors.
3822  *
3823  * GFX
3824  * Gfx consists of a single ring and can process both gfx jobs and
3825  * compute jobs.  The gfx CP consists of three microengines (ME):
3826  * PFP - Pre-Fetch Parser
3827  * ME - Micro Engine
3828  * CE - Constant Engine
3829  * The PFP and ME make up what is considered the Drawing Engine (DE).
3830  * The CE is an asynchronous engine used for updating buffer desciptors
3831  * used by the DE so that they can be loaded into cache in parallel
3832  * while the DE is processing state update packets.
3833  *
3834  * Compute
3835  * The compute CP consists of two microengines (ME):
3836  * MEC1 - Compute MicroEngine 1
3837  * MEC2 - Compute MicroEngine 2
3838  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3839  * The queues are exposed to userspace and are programmed directly
3840  * by the compute runtime.
3841  */
3842 /**
3843  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3844  *
3845  * @rdev: radeon_device pointer
3846  * @enable: enable or disable the MEs
3847  *
3848  * Halts or unhalts the gfx MEs.
3849  */
3850 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3851 {
3852         if (enable)
3853                 WREG32(CP_ME_CNTL, 0);
3854         else {
3855                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3856                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3857                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3858                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3859         }
3860         udelay(50);
3861 }
3862
3863 /**
3864  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3865  *
3866  * @rdev: radeon_device pointer
3867  *
3868  * Loads the gfx PFP, ME, and CE ucode.
3869  * Returns 0 for success, -EINVAL if the ucode is not available.
3870  */
3871 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3872 {
3873         const __be32 *fw_data;
3874         int i;
3875
3876         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3877                 return -EINVAL;
3878
3879         cik_cp_gfx_enable(rdev, false);
3880
3881         /* PFP */
3882         fw_data = (const __be32 *)rdev->pfp_fw->data;
3883         WREG32(CP_PFP_UCODE_ADDR, 0);
3884         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3885                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3886         WREG32(CP_PFP_UCODE_ADDR, 0);
3887
3888         /* CE */
3889         fw_data = (const __be32 *)rdev->ce_fw->data;
3890         WREG32(CP_CE_UCODE_ADDR, 0);
3891         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3892                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3893         WREG32(CP_CE_UCODE_ADDR, 0);
3894
3895         /* ME */
3896         fw_data = (const __be32 *)rdev->me_fw->data;
3897         WREG32(CP_ME_RAM_WADDR, 0);
3898         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3899                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3900         WREG32(CP_ME_RAM_WADDR, 0);
3901
3902         WREG32(CP_PFP_UCODE_ADDR, 0);
3903         WREG32(CP_CE_UCODE_ADDR, 0);
3904         WREG32(CP_ME_RAM_WADDR, 0);
3905         WREG32(CP_ME_RAM_RADDR, 0);
3906         return 0;
3907 }
3908
3909 /**
3910  * cik_cp_gfx_start - start the gfx ring
3911  *
3912  * @rdev: radeon_device pointer
3913  *
3914  * Enables the ring and loads the clear state context and other
3915  * packets required to init the ring.
3916  * Returns 0 for success, error for failure.
3917  */
3918 static int cik_cp_gfx_start(struct radeon_device *rdev)
3919 {
3920         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3921         int r, i;
3922
3923         /* init the CP */
3924         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3925         WREG32(CP_ENDIAN_SWAP, 0);
3926         WREG32(CP_DEVICE_ID, 1);
3927
3928         cik_cp_gfx_enable(rdev, true);
3929
3930         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3931         if (r) {
3932                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3933                 return r;
3934         }
3935
3936         /* init the CE partitions.  CE only used for gfx on CIK */
3937         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3938         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3939         radeon_ring_write(ring, 0xc000);
3940         radeon_ring_write(ring, 0xc000);
3941
3942         /* setup clear context state */
3943         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3944         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3945
3946         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3947         radeon_ring_write(ring, 0x80000000);
3948         radeon_ring_write(ring, 0x80000000);
3949
3950         for (i = 0; i < cik_default_size; i++)
3951                 radeon_ring_write(ring, cik_default_state[i]);
3952
3953         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3954         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3955
3956         /* set clear context state */
3957         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3958         radeon_ring_write(ring, 0);
3959
3960         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3961         radeon_ring_write(ring, 0x00000316);
3962         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3963         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3964
3965         radeon_ring_unlock_commit(rdev, ring);
3966
3967         return 0;
3968 }
3969
3970 /**
3971  * cik_cp_gfx_fini - stop the gfx ring
3972  *
3973  * @rdev: radeon_device pointer
3974  *
3975  * Stop the gfx ring and tear down the driver ring
3976  * info.
3977  */
3978 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3979 {
3980         cik_cp_gfx_enable(rdev, false);
3981         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3982 }
3983
3984 /**
3985  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3986  *
3987  * @rdev: radeon_device pointer
3988  *
3989  * Program the location and size of the gfx ring buffer
3990  * and test it to make sure it's working.
3991  * Returns 0 for success, error for failure.
3992  */
3993 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3994 {
3995         struct radeon_ring *ring;
3996         u32 tmp;
3997         u32 rb_bufsz;
3998         u64 rb_addr;
3999         int r;
4000
4001         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4002         if (rdev->family != CHIP_HAWAII)
4003                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4004
4005         /* Set the write pointer delay */
4006         WREG32(CP_RB_WPTR_DELAY, 0);
4007
4008         /* set the RB to use vmid 0 */
4009         WREG32(CP_RB_VMID, 0);
4010
4011         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4012
4013         /* ring 0 - compute and gfx */
4014         /* Set ring buffer size */
4015         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4016         rb_bufsz = order_base_2(ring->ring_size / 8);
4017         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4018 #ifdef __BIG_ENDIAN
4019         tmp |= BUF_SWAP_32BIT;
4020 #endif
4021         WREG32(CP_RB0_CNTL, tmp);
4022
4023         /* Initialize the ring buffer's read and write pointers */
4024         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4025         ring->wptr = 0;
4026         WREG32(CP_RB0_WPTR, ring->wptr);
4027
4028         /* set the wb address wether it's enabled or not */
4029         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4030         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4031
4032         /* scratch register shadowing is no longer supported */
4033         WREG32(SCRATCH_UMSK, 0);
4034
4035         if (!rdev->wb.enabled)
4036                 tmp |= RB_NO_UPDATE;
4037
4038         mdelay(1);
4039         WREG32(CP_RB0_CNTL, tmp);
4040
4041         rb_addr = ring->gpu_addr >> 8;
4042         WREG32(CP_RB0_BASE, rb_addr);
4043         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4044
4045         ring->rptr = RREG32(CP_RB0_RPTR);
4046
4047         /* start the ring */
4048         cik_cp_gfx_start(rdev);
4049         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4050         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4051         if (r) {
4052                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4053                 return r;
4054         }
4055
4056         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4057                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4058
4059         return 0;
4060 }
4061
4062 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4063                      struct radeon_ring *ring)
4064 {
4065         u32 rptr;
4066
4067         if (rdev->wb.enabled)
4068                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4069         else
4070                 rptr = RREG32(CP_RB0_RPTR);
4071
4072         return rptr;
4073 }
4074
4075 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4076                      struct radeon_ring *ring)
4077 {
4078         u32 wptr;
4079
4080         wptr = RREG32(CP_RB0_WPTR);
4081
4082         return wptr;
4083 }
4084
4085 void cik_gfx_set_wptr(struct radeon_device *rdev,
4086                       struct radeon_ring *ring)
4087 {
4088         WREG32(CP_RB0_WPTR, ring->wptr);
4089         (void)RREG32(CP_RB0_WPTR);
4090 }
4091
4092 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4093                          struct radeon_ring *ring)
4094 {
4095         u32 rptr;
4096
4097         if (rdev->wb.enabled) {
4098                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4099         } else {
4100                 mutex_lock(&rdev->srbm_mutex);
4101                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4102                 rptr = RREG32(CP_HQD_PQ_RPTR);
4103                 cik_srbm_select(rdev, 0, 0, 0, 0);
4104                 mutex_unlock(&rdev->srbm_mutex);
4105         }
4106
4107         return rptr;
4108 }
4109
4110 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4111                          struct radeon_ring *ring)
4112 {
4113         u32 wptr;
4114
4115         if (rdev->wb.enabled) {
4116                 /* XXX check if swapping is necessary on BE */
4117                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4118         } else {
4119                 mutex_lock(&rdev->srbm_mutex);
4120                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4121                 wptr = RREG32(CP_HQD_PQ_WPTR);
4122                 cik_srbm_select(rdev, 0, 0, 0, 0);
4123                 mutex_unlock(&rdev->srbm_mutex);
4124         }
4125
4126         return wptr;
4127 }
4128
4129 void cik_compute_set_wptr(struct radeon_device *rdev,
4130                           struct radeon_ring *ring)
4131 {
4132         /* XXX check if swapping is necessary on BE */
4133         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4134         WDOORBELL32(ring->doorbell_index, ring->wptr);
4135 }
4136
4137 /**
4138  * cik_cp_compute_enable - enable/disable the compute CP MEs
4139  *
4140  * @rdev: radeon_device pointer
4141  * @enable: enable or disable the MEs
4142  *
4143  * Halts or unhalts the compute MEs.
4144  */
4145 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4146 {
4147         if (enable)
4148                 WREG32(CP_MEC_CNTL, 0);
4149         else {
4150                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4151                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4152                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4153         }
4154         udelay(50);
4155 }
4156
4157 /**
4158  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4159  *
4160  * @rdev: radeon_device pointer
4161  *
4162  * Loads the compute MEC1&2 ucode.
4163  * Returns 0 for success, -EINVAL if the ucode is not available.
4164  */
4165 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4166 {
4167         const __be32 *fw_data;
4168         int i;
4169
4170         if (!rdev->mec_fw)
4171                 return -EINVAL;
4172
4173         cik_cp_compute_enable(rdev, false);
4174
4175         /* MEC1 */
4176         fw_data = (const __be32 *)rdev->mec_fw->data;
4177         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4178         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4179                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4180         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4181
4182         if (rdev->family == CHIP_KAVERI) {
4183                 /* MEC2 */
4184                 fw_data = (const __be32 *)rdev->mec_fw->data;
4185                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4186                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4187                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4188                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4189         }
4190
4191         return 0;
4192 }
4193
4194 /**
4195  * cik_cp_compute_start - start the compute queues
4196  *
4197  * @rdev: radeon_device pointer
4198  *
4199  * Enable the compute queues.
4200  * Returns 0 for success, error for failure.
4201  */
4202 static int cik_cp_compute_start(struct radeon_device *rdev)
4203 {
4204         cik_cp_compute_enable(rdev, true);
4205
4206         return 0;
4207 }
4208
4209 /**
4210  * cik_cp_compute_fini - stop the compute queues
4211  *
4212  * @rdev: radeon_device pointer
4213  *
4214  * Stop the compute queues and tear down the driver queue
4215  * info.
4216  */
4217 static void cik_cp_compute_fini(struct radeon_device *rdev)
4218 {
4219         int i, idx, r;
4220
4221         cik_cp_compute_enable(rdev, false);
4222
4223         for (i = 0; i < 2; i++) {
4224                 if (i == 0)
4225                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4226                 else
4227                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4228
4229                 if (rdev->ring[idx].mqd_obj) {
4230                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4231                         if (unlikely(r != 0))
4232                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4233
4234                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4235                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4236
4237                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4238                         rdev->ring[idx].mqd_obj = NULL;
4239                 }
4240         }
4241 }
4242
4243 static void cik_mec_fini(struct radeon_device *rdev)
4244 {
4245         int r;
4246
4247         if (rdev->mec.hpd_eop_obj) {
4248                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4249                 if (unlikely(r != 0))
4250                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4251                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4252                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4253
4254                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4255                 rdev->mec.hpd_eop_obj = NULL;
4256         }
4257 }
4258
4259 #define MEC_HPD_SIZE 2048
4260
4261 static int cik_mec_init(struct radeon_device *rdev)
4262 {
4263         int r;
4264         u32 *hpd;
4265
4266         /*
4267          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4268          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4269          */
4270         if (rdev->family == CHIP_KAVERI)
4271                 rdev->mec.num_mec = 2;
4272         else
4273                 rdev->mec.num_mec = 1;
4274         rdev->mec.num_pipe = 4;
4275         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4276
4277         if (rdev->mec.hpd_eop_obj == NULL) {
4278                 r = radeon_bo_create(rdev,
4279                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4280                                      PAGE_SIZE, true,
4281                                      RADEON_GEM_DOMAIN_GTT, NULL,
4282                                      &rdev->mec.hpd_eop_obj);
4283                 if (r) {
4284                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4285                         return r;
4286                 }
4287         }
4288
4289         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4290         if (unlikely(r != 0)) {
4291                 cik_mec_fini(rdev);
4292                 return r;
4293         }
4294         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4295                           &rdev->mec.hpd_eop_gpu_addr);
4296         if (r) {
4297                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4298                 cik_mec_fini(rdev);
4299                 return r;
4300         }
4301         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4302         if (r) {
4303                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4304                 cik_mec_fini(rdev);
4305                 return r;
4306         }
4307
4308         /* clear memory.  Not sure if this is required or not */
4309         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4310
4311         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4312         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4313
4314         return 0;
4315 }
4316
4317 struct hqd_registers
4318 {
4319         u32 cp_mqd_base_addr;
4320         u32 cp_mqd_base_addr_hi;
4321         u32 cp_hqd_active;
4322         u32 cp_hqd_vmid;
4323         u32 cp_hqd_persistent_state;
4324         u32 cp_hqd_pipe_priority;
4325         u32 cp_hqd_queue_priority;
4326         u32 cp_hqd_quantum;
4327         u32 cp_hqd_pq_base;
4328         u32 cp_hqd_pq_base_hi;
4329         u32 cp_hqd_pq_rptr;
4330         u32 cp_hqd_pq_rptr_report_addr;
4331         u32 cp_hqd_pq_rptr_report_addr_hi;
4332         u32 cp_hqd_pq_wptr_poll_addr;
4333         u32 cp_hqd_pq_wptr_poll_addr_hi;
4334         u32 cp_hqd_pq_doorbell_control;
4335         u32 cp_hqd_pq_wptr;
4336         u32 cp_hqd_pq_control;
4337         u32 cp_hqd_ib_base_addr;
4338         u32 cp_hqd_ib_base_addr_hi;
4339         u32 cp_hqd_ib_rptr;
4340         u32 cp_hqd_ib_control;
4341         u32 cp_hqd_iq_timer;
4342         u32 cp_hqd_iq_rptr;
4343         u32 cp_hqd_dequeue_request;
4344         u32 cp_hqd_dma_offload;
4345         u32 cp_hqd_sema_cmd;
4346         u32 cp_hqd_msg_type;
4347         u32 cp_hqd_atomic0_preop_lo;
4348         u32 cp_hqd_atomic0_preop_hi;
4349         u32 cp_hqd_atomic1_preop_lo;
4350         u32 cp_hqd_atomic1_preop_hi;
4351         u32 cp_hqd_hq_scheduler0;
4352         u32 cp_hqd_hq_scheduler1;
4353         u32 cp_mqd_control;
4354 };
4355
4356 struct bonaire_mqd
4357 {
4358         u32 header;
4359         u32 dispatch_initiator;
4360         u32 dimensions[3];
4361         u32 start_idx[3];
4362         u32 num_threads[3];
4363         u32 pipeline_stat_enable;
4364         u32 perf_counter_enable;
4365         u32 pgm[2];
4366         u32 tba[2];
4367         u32 tma[2];
4368         u32 pgm_rsrc[2];
4369         u32 vmid;
4370         u32 resource_limits;
4371         u32 static_thread_mgmt01[2];
4372         u32 tmp_ring_size;
4373         u32 static_thread_mgmt23[2];
4374         u32 restart[3];
4375         u32 thread_trace_enable;
4376         u32 reserved1;
4377         u32 user_data[16];
4378         u32 vgtcs_invoke_count[2];
4379         struct hqd_registers queue_state;
4380         u32 dequeue_cntr;
4381         u32 interrupt_queue[64];
4382 };
4383
4384 /**
4385  * cik_cp_compute_resume - setup the compute queue registers
4386  *
4387  * @rdev: radeon_device pointer
4388  *
4389  * Program the compute queues and test them to make sure they
4390  * are working.
4391  * Returns 0 for success, error for failure.
4392  */
4393 static int cik_cp_compute_resume(struct radeon_device *rdev)
4394 {
4395         int r, i, j, idx;
4396         u32 tmp;
4397         bool use_doorbell = true;
4398         u64 hqd_gpu_addr;
4399         u64 mqd_gpu_addr;
4400         u64 eop_gpu_addr;
4401         u64 wb_gpu_addr;
4402         u32 *buf;
4403         struct bonaire_mqd *mqd;
4404
4405         r = cik_cp_compute_start(rdev);
4406         if (r)
4407                 return r;
4408
4409         /* fix up chicken bits */
4410         tmp = RREG32(CP_CPF_DEBUG);
4411         tmp |= (1 << 23);
4412         WREG32(CP_CPF_DEBUG, tmp);
4413
4414         /* init the pipes */
4415         mutex_lock(&rdev->srbm_mutex);
4416         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4417                 int me = (i < 4) ? 1 : 2;
4418                 int pipe = (i < 4) ? i : (i - 4);
4419
4420                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4421
4422                 cik_srbm_select(rdev, me, pipe, 0, 0);
4423
4424                 /* write the EOP addr */
4425                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4426                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4427
4428                 /* set the VMID assigned */
4429                 WREG32(CP_HPD_EOP_VMID, 0);
4430
4431                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4432                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4433                 tmp &= ~EOP_SIZE_MASK;
4434                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4435                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4436         }
4437         cik_srbm_select(rdev, 0, 0, 0, 0);
4438         mutex_unlock(&rdev->srbm_mutex);
4439
4440         /* init the queues.  Just two for now. */
4441         for (i = 0; i < 2; i++) {
4442                 if (i == 0)
4443                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4444                 else
4445                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4446
4447                 if (rdev->ring[idx].mqd_obj == NULL) {
4448                         r = radeon_bo_create(rdev,
4449                                              sizeof(struct bonaire_mqd),
4450                                              PAGE_SIZE, true,
4451                                              RADEON_GEM_DOMAIN_GTT, NULL,
4452                                              &rdev->ring[idx].mqd_obj);
4453                         if (r) {
4454                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4455                                 return r;
4456                         }
4457                 }
4458
4459                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4460                 if (unlikely(r != 0)) {
4461                         cik_cp_compute_fini(rdev);
4462                         return r;
4463                 }
4464                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4465                                   &mqd_gpu_addr);
4466                 if (r) {
4467                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4468                         cik_cp_compute_fini(rdev);
4469                         return r;
4470                 }
4471                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4472                 if (r) {
4473                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4474                         cik_cp_compute_fini(rdev);
4475                         return r;
4476                 }
4477
4478                 /* init the mqd struct */
4479                 memset(buf, 0, sizeof(struct bonaire_mqd));
4480
4481                 mqd = (struct bonaire_mqd *)buf;
4482                 mqd->header = 0xC0310800;
4483                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4484                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4485                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4486                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4487
4488                 mutex_lock(&rdev->srbm_mutex);
4489                 cik_srbm_select(rdev, rdev->ring[idx].me,
4490                                 rdev->ring[idx].pipe,
4491                                 rdev->ring[idx].queue, 0);
4492
4493                 /* disable wptr polling */
4494                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4495                 tmp &= ~WPTR_POLL_EN;
4496                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4497
4498                 /* enable doorbell? */
4499                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4500                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4501                 if (use_doorbell)
4502                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4503                 else
4504                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4505                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4506                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4507
4508                 /* disable the queue if it's active */
4509                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4510                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4511                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4512                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4513                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4514                         for (j = 0; j < rdev->usec_timeout; j++) {
4515                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4516                                         break;
4517                                 udelay(1);
4518                         }
4519                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4520                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4521                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4522                 }
4523
4524                 /* set the pointer to the MQD */
4525                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4526                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4527                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4528                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4529                 /* set MQD vmid to 0 */
4530                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4531                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4532                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4533
4534                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4535                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4536                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4537                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4538                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4539                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4540
4541                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4542                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4543                 mqd->queue_state.cp_hqd_pq_control &=
4544                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4545
4546                 mqd->queue_state.cp_hqd_pq_control |=
4547                         order_base_2(rdev->ring[idx].ring_size / 8);
4548                 mqd->queue_state.cp_hqd_pq_control |=
4549                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4550 #ifdef __BIG_ENDIAN
4551                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4552 #endif
4553                 mqd->queue_state.cp_hqd_pq_control &=
4554                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4555                 mqd->queue_state.cp_hqd_pq_control |=
4556                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4557                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4558
4559                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4560                 if (i == 0)
4561                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4562                 else
4563                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4564                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4565                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4566                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4567                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4568                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4569
4570                 /* set the wb address wether it's enabled or not */
4571                 if (i == 0)
4572                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4573                 else
4574                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4575                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4576                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4577                         upper_32_bits(wb_gpu_addr) & 0xffff;
4578                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4579                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4580                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4581                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4582
4583                 /* enable the doorbell if requested */
4584                 if (use_doorbell) {
4585                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4586                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4587                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4588                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4589                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4590                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4591                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4592                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4593
4594                 } else {
4595                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4596                 }
4597                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4598                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4599
4600                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4601                 rdev->ring[idx].wptr = 0;
4602                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4603                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4604                 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
4605                 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
4606
4607                 /* set the vmid for the queue */
4608                 mqd->queue_state.cp_hqd_vmid = 0;
4609                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4610
4611                 /* activate the queue */
4612                 mqd->queue_state.cp_hqd_active = 1;
4613                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4614
4615                 cik_srbm_select(rdev, 0, 0, 0, 0);
4616                 mutex_unlock(&rdev->srbm_mutex);
4617
4618                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4619                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4620
4621                 rdev->ring[idx].ready = true;
4622                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4623                 if (r)
4624                         rdev->ring[idx].ready = false;
4625         }
4626
4627         return 0;
4628 }
4629
4630 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4631 {
4632         cik_cp_gfx_enable(rdev, enable);
4633         cik_cp_compute_enable(rdev, enable);
4634 }
4635
4636 static int cik_cp_load_microcode(struct radeon_device *rdev)
4637 {
4638         int r;
4639
4640         r = cik_cp_gfx_load_microcode(rdev);
4641         if (r)
4642                 return r;
4643         r = cik_cp_compute_load_microcode(rdev);
4644         if (r)
4645                 return r;
4646
4647         return 0;
4648 }
4649
4650 static void cik_cp_fini(struct radeon_device *rdev)
4651 {
4652         cik_cp_gfx_fini(rdev);
4653         cik_cp_compute_fini(rdev);
4654 }
4655
4656 static int cik_cp_resume(struct radeon_device *rdev)
4657 {
4658         int r;
4659
4660         cik_enable_gui_idle_interrupt(rdev, false);
4661
4662         r = cik_cp_load_microcode(rdev);
4663         if (r)
4664                 return r;
4665
4666         r = cik_cp_gfx_resume(rdev);
4667         if (r)
4668                 return r;
4669         r = cik_cp_compute_resume(rdev);
4670         if (r)
4671                 return r;
4672
4673         cik_enable_gui_idle_interrupt(rdev, true);
4674
4675         return 0;
4676 }
4677
4678 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4679 {
4680         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4681                 RREG32(GRBM_STATUS));
4682         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4683                 RREG32(GRBM_STATUS2));
4684         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4685                 RREG32(GRBM_STATUS_SE0));
4686         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4687                 RREG32(GRBM_STATUS_SE1));
4688         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4689                 RREG32(GRBM_STATUS_SE2));
4690         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4691                 RREG32(GRBM_STATUS_SE3));
4692         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4693                 RREG32(SRBM_STATUS));
4694         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4695                 RREG32(SRBM_STATUS2));
4696         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4697                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4698         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4699                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4700         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4701         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4702                  RREG32(CP_STALLED_STAT1));
4703         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4704                  RREG32(CP_STALLED_STAT2));
4705         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4706                  RREG32(CP_STALLED_STAT3));
4707         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4708                  RREG32(CP_CPF_BUSY_STAT));
4709         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4710                  RREG32(CP_CPF_STALLED_STAT1));
4711         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4712         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4713         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4714                  RREG32(CP_CPC_STALLED_STAT1));
4715         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4716 }
4717
4718 /**
4719  * cik_gpu_check_soft_reset - check which blocks are busy
4720  *
4721  * @rdev: radeon_device pointer
4722  *
4723  * Check which blocks are busy and return the relevant reset
4724  * mask to be used by cik_gpu_soft_reset().
4725  * Returns a mask of the blocks to be reset.
4726  */
4727 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4728 {
4729         u32 reset_mask = 0;
4730         u32 tmp;
4731
4732         /* GRBM_STATUS */
4733         tmp = RREG32(GRBM_STATUS);
4734         if (tmp & (PA_BUSY | SC_BUSY |
4735                    BCI_BUSY | SX_BUSY |
4736                    TA_BUSY | VGT_BUSY |
4737                    DB_BUSY | CB_BUSY |
4738                    GDS_BUSY | SPI_BUSY |
4739                    IA_BUSY | IA_BUSY_NO_DMA))
4740                 reset_mask |= RADEON_RESET_GFX;
4741
4742         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4743                 reset_mask |= RADEON_RESET_CP;
4744
4745         /* GRBM_STATUS2 */
4746         tmp = RREG32(GRBM_STATUS2);
4747         if (tmp & RLC_BUSY)
4748                 reset_mask |= RADEON_RESET_RLC;
4749
4750         /* SDMA0_STATUS_REG */
4751         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4752         if (!(tmp & SDMA_IDLE))
4753                 reset_mask |= RADEON_RESET_DMA;
4754
4755         /* SDMA1_STATUS_REG */
4756         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4757         if (!(tmp & SDMA_IDLE))
4758                 reset_mask |= RADEON_RESET_DMA1;
4759
4760         /* SRBM_STATUS2 */
4761         tmp = RREG32(SRBM_STATUS2);
4762         if (tmp & SDMA_BUSY)
4763                 reset_mask |= RADEON_RESET_DMA;
4764
4765         if (tmp & SDMA1_BUSY)
4766                 reset_mask |= RADEON_RESET_DMA1;
4767
4768         /* SRBM_STATUS */
4769         tmp = RREG32(SRBM_STATUS);
4770
4771         if (tmp & IH_BUSY)
4772                 reset_mask |= RADEON_RESET_IH;
4773
4774         if (tmp & SEM_BUSY)
4775                 reset_mask |= RADEON_RESET_SEM;
4776
4777         if (tmp & GRBM_RQ_PENDING)
4778                 reset_mask |= RADEON_RESET_GRBM;
4779
4780         if (tmp & VMC_BUSY)
4781                 reset_mask |= RADEON_RESET_VMC;
4782
4783         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4784                    MCC_BUSY | MCD_BUSY))
4785                 reset_mask |= RADEON_RESET_MC;
4786
4787         if (evergreen_is_display_hung(rdev))
4788                 reset_mask |= RADEON_RESET_DISPLAY;
4789
4790         /* Skip MC reset as it's mostly likely not hung, just busy */
4791         if (reset_mask & RADEON_RESET_MC) {
4792                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4793                 reset_mask &= ~RADEON_RESET_MC;
4794         }
4795
4796         return reset_mask;
4797 }
4798
4799 /**
4800  * cik_gpu_soft_reset - soft reset GPU
4801  *
4802  * @rdev: radeon_device pointer
4803  * @reset_mask: mask of which blocks to reset
4804  *
4805  * Soft reset the blocks specified in @reset_mask.
4806  */
4807 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4808 {
4809         struct evergreen_mc_save save;
4810         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4811         u32 tmp;
4812
4813         if (reset_mask == 0)
4814                 return;
4815
4816         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4817
4818         cik_print_gpu_status_regs(rdev);
4819         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4820                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4821         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4822                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4823
4824         /* disable CG/PG */
4825         cik_fini_pg(rdev);
4826         cik_fini_cg(rdev);
4827
4828         /* stop the rlc */
4829         cik_rlc_stop(rdev);
4830
4831         /* Disable GFX parsing/prefetching */
4832         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4833
4834         /* Disable MEC parsing/prefetching */
4835         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4836
4837         if (reset_mask & RADEON_RESET_DMA) {
4838                 /* sdma0 */
4839                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4840                 tmp |= SDMA_HALT;
4841                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4842         }
4843         if (reset_mask & RADEON_RESET_DMA1) {
4844                 /* sdma1 */
4845                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4846                 tmp |= SDMA_HALT;
4847                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4848         }
4849
4850         evergreen_mc_stop(rdev, &save);
4851         if (evergreen_mc_wait_for_idle(rdev)) {
4852                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4853         }
4854
4855         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4856                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4857
4858         if (reset_mask & RADEON_RESET_CP) {
4859                 grbm_soft_reset |= SOFT_RESET_CP;
4860
4861                 srbm_soft_reset |= SOFT_RESET_GRBM;
4862         }
4863
4864         if (reset_mask & RADEON_RESET_DMA)
4865                 srbm_soft_reset |= SOFT_RESET_SDMA;
4866
4867         if (reset_mask & RADEON_RESET_DMA1)
4868                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4869
4870         if (reset_mask & RADEON_RESET_DISPLAY)
4871                 srbm_soft_reset |= SOFT_RESET_DC;
4872
4873         if (reset_mask & RADEON_RESET_RLC)
4874                 grbm_soft_reset |= SOFT_RESET_RLC;
4875
4876         if (reset_mask & RADEON_RESET_SEM)
4877                 srbm_soft_reset |= SOFT_RESET_SEM;
4878
4879         if (reset_mask & RADEON_RESET_IH)
4880                 srbm_soft_reset |= SOFT_RESET_IH;
4881
4882         if (reset_mask & RADEON_RESET_GRBM)
4883                 srbm_soft_reset |= SOFT_RESET_GRBM;
4884
4885         if (reset_mask & RADEON_RESET_VMC)
4886                 srbm_soft_reset |= SOFT_RESET_VMC;
4887
4888         if (!(rdev->flags & RADEON_IS_IGP)) {
4889                 if (reset_mask & RADEON_RESET_MC)
4890                         srbm_soft_reset |= SOFT_RESET_MC;
4891         }
4892
4893         if (grbm_soft_reset) {
4894                 tmp = RREG32(GRBM_SOFT_RESET);
4895                 tmp |= grbm_soft_reset;
4896                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4897                 WREG32(GRBM_SOFT_RESET, tmp);
4898                 tmp = RREG32(GRBM_SOFT_RESET);
4899
4900                 udelay(50);
4901
4902                 tmp &= ~grbm_soft_reset;
4903                 WREG32(GRBM_SOFT_RESET, tmp);
4904                 tmp = RREG32(GRBM_SOFT_RESET);
4905         }
4906
4907         if (srbm_soft_reset) {
4908                 tmp = RREG32(SRBM_SOFT_RESET);
4909                 tmp |= srbm_soft_reset;
4910                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4911                 WREG32(SRBM_SOFT_RESET, tmp);
4912                 tmp = RREG32(SRBM_SOFT_RESET);
4913
4914                 udelay(50);
4915
4916                 tmp &= ~srbm_soft_reset;
4917                 WREG32(SRBM_SOFT_RESET, tmp);
4918                 tmp = RREG32(SRBM_SOFT_RESET);
4919         }
4920
4921         /* Wait a little for things to settle down */
4922         udelay(50);
4923
4924         evergreen_mc_resume(rdev, &save);
4925         udelay(50);
4926
4927         cik_print_gpu_status_regs(rdev);
4928 }
4929
4930 struct kv_reset_save_regs {
4931         u32 gmcon_reng_execute;
4932         u32 gmcon_misc;
4933         u32 gmcon_misc3;
4934 };
4935
4936 static void kv_save_regs_for_reset(struct radeon_device *rdev,
4937                                    struct kv_reset_save_regs *save)
4938 {
4939         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
4940         save->gmcon_misc = RREG32(GMCON_MISC);
4941         save->gmcon_misc3 = RREG32(GMCON_MISC3);
4942
4943         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
4944         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
4945                                                 STCTRL_STUTTER_EN));
4946 }
4947
4948 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
4949                                       struct kv_reset_save_regs *save)
4950 {
4951         int i;
4952
4953         WREG32(GMCON_PGFSM_WRITE, 0);
4954         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
4955
4956         for (i = 0; i < 5; i++)
4957                 WREG32(GMCON_PGFSM_WRITE, 0);
4958
4959         WREG32(GMCON_PGFSM_WRITE, 0);
4960         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
4961
4962         for (i = 0; i < 5; i++)
4963                 WREG32(GMCON_PGFSM_WRITE, 0);
4964
4965         WREG32(GMCON_PGFSM_WRITE, 0x210000);
4966         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
4967
4968         for (i = 0; i < 5; i++)
4969                 WREG32(GMCON_PGFSM_WRITE, 0);
4970
4971         WREG32(GMCON_PGFSM_WRITE, 0x21003);
4972         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
4973
4974         for (i = 0; i < 5; i++)
4975                 WREG32(GMCON_PGFSM_WRITE, 0);
4976
4977         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
4978         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
4979
4980         for (i = 0; i < 5; i++)
4981                 WREG32(GMCON_PGFSM_WRITE, 0);
4982
4983         WREG32(GMCON_PGFSM_WRITE, 0);
4984         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
4985
4986         for (i = 0; i < 5; i++)
4987                 WREG32(GMCON_PGFSM_WRITE, 0);
4988
4989         WREG32(GMCON_PGFSM_WRITE, 0x420000);
4990         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
4991
4992         for (i = 0; i < 5; i++)
4993                 WREG32(GMCON_PGFSM_WRITE, 0);
4994
4995         WREG32(GMCON_PGFSM_WRITE, 0x120202);
4996         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
4997
4998         for (i = 0; i < 5; i++)
4999                 WREG32(GMCON_PGFSM_WRITE, 0);
5000
5001         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5002         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5003
5004         for (i = 0; i < 5; i++)
5005                 WREG32(GMCON_PGFSM_WRITE, 0);
5006
5007         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5008         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5009
5010         for (i = 0; i < 5; i++)
5011                 WREG32(GMCON_PGFSM_WRITE, 0);
5012
5013         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5014         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5015
5016         WREG32(GMCON_MISC3, save->gmcon_misc3);
5017         WREG32(GMCON_MISC, save->gmcon_misc);
5018         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5019 }
5020
5021 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5022 {
5023         struct evergreen_mc_save save;
5024         struct kv_reset_save_regs kv_save = { 0 };
5025         u32 tmp, i;
5026
5027         dev_info(rdev->dev, "GPU pci config reset\n");
5028
5029         /* disable dpm? */
5030
5031         /* disable cg/pg */
5032         cik_fini_pg(rdev);
5033         cik_fini_cg(rdev);
5034
5035         /* Disable GFX parsing/prefetching */
5036         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5037
5038         /* Disable MEC parsing/prefetching */
5039         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5040
5041         /* sdma0 */
5042         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5043         tmp |= SDMA_HALT;
5044         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5045         /* sdma1 */
5046         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5047         tmp |= SDMA_HALT;
5048         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5049         /* XXX other engines? */
5050
5051         /* halt the rlc, disable cp internal ints */
5052         cik_rlc_stop(rdev);
5053
5054         udelay(50);
5055
5056         /* disable mem access */
5057         evergreen_mc_stop(rdev, &save);
5058         if (evergreen_mc_wait_for_idle(rdev)) {
5059                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5060         }
5061
5062         if (rdev->flags & RADEON_IS_IGP)
5063                 kv_save_regs_for_reset(rdev, &kv_save);
5064
5065         /* disable BM */
5066         pci_clear_master(rdev->pdev);
5067         /* reset */
5068         radeon_pci_config_reset(rdev);
5069
5070         udelay(100);
5071
5072         /* wait for asic to come out of reset */
5073         for (i = 0; i < rdev->usec_timeout; i++) {
5074                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5075                         break;
5076                 udelay(1);
5077         }
5078
5079         /* does asic init need to be run first??? */
5080         if (rdev->flags & RADEON_IS_IGP)
5081                 kv_restore_regs_for_reset(rdev, &kv_save);
5082 }
5083
5084 /**
5085  * cik_asic_reset - soft reset GPU
5086  *
5087  * @rdev: radeon_device pointer
5088  *
5089  * Look up which blocks are hung and attempt
5090  * to reset them.
5091  * Returns 0 for success.
5092  */
5093 int cik_asic_reset(struct radeon_device *rdev)
5094 {
5095         u32 reset_mask;
5096
5097         reset_mask = cik_gpu_check_soft_reset(rdev);
5098
5099         if (reset_mask)
5100                 r600_set_bios_scratch_engine_hung(rdev, true);
5101
5102         /* try soft reset */
5103         cik_gpu_soft_reset(rdev, reset_mask);
5104
5105         reset_mask = cik_gpu_check_soft_reset(rdev);
5106
5107         /* try pci config reset */
5108         if (reset_mask && radeon_hard_reset)
5109                 cik_gpu_pci_config_reset(rdev);
5110
5111         reset_mask = cik_gpu_check_soft_reset(rdev);
5112
5113         if (!reset_mask)
5114                 r600_set_bios_scratch_engine_hung(rdev, false);
5115
5116         return 0;
5117 }
5118
5119 /**
5120  * cik_gfx_is_lockup - check if the 3D engine is locked up
5121  *
5122  * @rdev: radeon_device pointer
5123  * @ring: radeon_ring structure holding ring information
5124  *
5125  * Check if the 3D engine is locked up (CIK).
5126  * Returns true if the engine is locked, false if not.
5127  */
5128 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5129 {
5130         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5131
5132         if (!(reset_mask & (RADEON_RESET_GFX |
5133                             RADEON_RESET_COMPUTE |
5134                             RADEON_RESET_CP))) {
5135                 radeon_ring_lockup_update(ring);
5136                 return false;
5137         }
5138         /* force CP activities */
5139         radeon_ring_force_activity(rdev, ring);
5140         return radeon_ring_test_lockup(rdev, ring);
5141 }
5142
5143 /* MC */
5144 /**
5145  * cik_mc_program - program the GPU memory controller
5146  *
5147  * @rdev: radeon_device pointer
5148  *
5149  * Set the location of vram, gart, and AGP in the GPU's
5150  * physical address space (CIK).
5151  */
5152 static void cik_mc_program(struct radeon_device *rdev)
5153 {
5154         struct evergreen_mc_save save;
5155         u32 tmp;
5156         int i, j;
5157
5158         /* Initialize HDP */
5159         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5160                 WREG32((0x2c14 + j), 0x00000000);
5161                 WREG32((0x2c18 + j), 0x00000000);
5162                 WREG32((0x2c1c + j), 0x00000000);
5163                 WREG32((0x2c20 + j), 0x00000000);
5164                 WREG32((0x2c24 + j), 0x00000000);
5165         }
5166         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5167
5168         evergreen_mc_stop(rdev, &save);
5169         if (radeon_mc_wait_for_idle(rdev)) {
5170                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5171         }
5172         /* Lockout access through VGA aperture*/
5173         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5174         /* Update configuration */
5175         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5176                rdev->mc.vram_start >> 12);
5177         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5178                rdev->mc.vram_end >> 12);
5179         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5180                rdev->vram_scratch.gpu_addr >> 12);
5181         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5182         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5183         WREG32(MC_VM_FB_LOCATION, tmp);
5184         /* XXX double check these! */
5185         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5186         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5187         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5188         WREG32(MC_VM_AGP_BASE, 0);
5189         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5190         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5191         if (radeon_mc_wait_for_idle(rdev)) {
5192                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5193         }
5194         evergreen_mc_resume(rdev, &save);
5195         /* we need to own VRAM, so turn off the VGA renderer here
5196          * to stop it overwriting our objects */
5197         rv515_vga_render_disable(rdev);
5198 }
5199
5200 /**
5201  * cik_mc_init - initialize the memory controller driver params
5202  *
5203  * @rdev: radeon_device pointer
5204  *
5205  * Look up the amount of vram, vram width, and decide how to place
5206  * vram and gart within the GPU's physical address space (CIK).
5207  * Returns 0 for success.
5208  */
5209 static int cik_mc_init(struct radeon_device *rdev)
5210 {
5211         u32 tmp;
5212         int chansize, numchan;
5213
5214         /* Get VRAM informations */
5215         rdev->mc.vram_is_ddr = true;
5216         tmp = RREG32(MC_ARB_RAMCFG);
5217         if (tmp & CHANSIZE_MASK) {
5218                 chansize = 64;
5219         } else {
5220                 chansize = 32;
5221         }
5222         tmp = RREG32(MC_SHARED_CHMAP);
5223         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5224         case 0:
5225         default:
5226                 numchan = 1;
5227                 break;
5228         case 1:
5229                 numchan = 2;
5230                 break;
5231         case 2:
5232                 numchan = 4;
5233                 break;
5234         case 3:
5235                 numchan = 8;
5236                 break;
5237         case 4:
5238                 numchan = 3;
5239                 break;
5240         case 5:
5241                 numchan = 6;
5242                 break;
5243         case 6:
5244                 numchan = 10;
5245                 break;
5246         case 7:
5247                 numchan = 12;
5248                 break;
5249         case 8:
5250                 numchan = 16;
5251                 break;
5252         }
5253         rdev->mc.vram_width = numchan * chansize;
5254         /* Could aper size report 0 ? */
5255         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5256         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5257         /* size in MB on si */
5258         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5259         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5260         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5261         si_vram_gtt_location(rdev, &rdev->mc);
5262         radeon_update_bandwidth_info(rdev);
5263
5264         return 0;
5265 }
5266
5267 /*
5268  * GART
5269  * VMID 0 is the physical GPU addresses as used by the kernel.
5270  * VMIDs 1-15 are used for userspace clients and are handled
5271  * by the radeon vm/hsa code.
5272  */
5273 /**
5274  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5275  *
5276  * @rdev: radeon_device pointer
5277  *
5278  * Flush the TLB for the VMID 0 page table (CIK).
5279  */
5280 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5281 {
5282         /* flush hdp cache */
5283         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5284
5285         /* bits 0-15 are the VM contexts0-15 */
5286         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5287 }
5288
5289 /**
5290  * cik_pcie_gart_enable - gart enable
5291  *
5292  * @rdev: radeon_device pointer
5293  *
5294  * This sets up the TLBs, programs the page tables for VMID0,
5295  * sets up the hw for VMIDs 1-15 which are allocated on
5296  * demand, and sets up the global locations for the LDS, GDS,
5297  * and GPUVM for FSA64 clients (CIK).
5298  * Returns 0 for success, errors for failure.
5299  */
5300 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5301 {
5302         int r, i;
5303
5304         if (rdev->gart.robj == NULL) {
5305                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5306                 return -EINVAL;
5307         }
5308         r = radeon_gart_table_vram_pin(rdev);
5309         if (r)
5310                 return r;
5311         radeon_gart_restore(rdev);
5312         /* Setup TLB control */
5313         WREG32(MC_VM_MX_L1_TLB_CNTL,
5314                (0xA << 7) |
5315                ENABLE_L1_TLB |
5316                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5317                ENABLE_ADVANCED_DRIVER_MODEL |
5318                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5319         /* Setup L2 cache */
5320         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5321                ENABLE_L2_FRAGMENT_PROCESSING |
5322                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5323                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5324                EFFECTIVE_L2_QUEUE_SIZE(7) |
5325                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5326         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5327         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5328                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5329         /* setup context0 */
5330         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5331         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5332         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5333         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5334                         (u32)(rdev->dummy_page.addr >> 12));
5335         WREG32(VM_CONTEXT0_CNTL2, 0);
5336         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5337                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5338
5339         WREG32(0x15D4, 0);
5340         WREG32(0x15D8, 0);
5341         WREG32(0x15DC, 0);
5342
5343         /* empty context1-15 */
5344         /* FIXME start with 4G, once using 2 level pt switch to full
5345          * vm size space
5346          */
5347         /* set vm size, must be a multiple of 4 */
5348         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5349         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5350         for (i = 1; i < 16; i++) {
5351                 if (i < 8)
5352                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5353                                rdev->gart.table_addr >> 12);
5354                 else
5355                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5356                                rdev->gart.table_addr >> 12);
5357         }
5358
5359         /* enable context1-15 */
5360         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5361                (u32)(rdev->dummy_page.addr >> 12));
5362         WREG32(VM_CONTEXT1_CNTL2, 4);
5363         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5364                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5365                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5366                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5367                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5368                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5369                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5370                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5371                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5372                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5373                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5374                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5375                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5376
5377         if (rdev->family == CHIP_KAVERI) {
5378                 u32 tmp = RREG32(CHUB_CONTROL);
5379                 tmp &= ~BYPASS_VM;
5380                 WREG32(CHUB_CONTROL, tmp);
5381         }
5382
5383         /* XXX SH_MEM regs */
5384         /* where to put LDS, scratch, GPUVM in FSA64 space */
5385         mutex_lock(&rdev->srbm_mutex);
5386         for (i = 0; i < 16; i++) {
5387                 cik_srbm_select(rdev, 0, 0, 0, i);
5388                 /* CP and shaders */
5389                 WREG32(SH_MEM_CONFIG, 0);
5390                 WREG32(SH_MEM_APE1_BASE, 1);
5391                 WREG32(SH_MEM_APE1_LIMIT, 0);
5392                 WREG32(SH_MEM_BASES, 0);
5393                 /* SDMA GFX */
5394                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5395                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5396                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5397                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5398                 /* XXX SDMA RLC - todo */
5399         }
5400         cik_srbm_select(rdev, 0, 0, 0, 0);
5401         mutex_unlock(&rdev->srbm_mutex);
5402
5403         cik_pcie_gart_tlb_flush(rdev);
5404         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5405                  (unsigned)(rdev->mc.gtt_size >> 20),
5406                  (unsigned long long)rdev->gart.table_addr);
5407         rdev->gart.ready = true;
5408         return 0;
5409 }
5410
5411 /**
5412  * cik_pcie_gart_disable - gart disable
5413  *
5414  * @rdev: radeon_device pointer
5415  *
5416  * This disables all VM page table (CIK).
5417  */
5418 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5419 {
5420         /* Disable all tables */
5421         WREG32(VM_CONTEXT0_CNTL, 0);
5422         WREG32(VM_CONTEXT1_CNTL, 0);
5423         /* Setup TLB control */
5424         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5425                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5426         /* Setup L2 cache */
5427         WREG32(VM_L2_CNTL,
5428                ENABLE_L2_FRAGMENT_PROCESSING |
5429                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5430                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5431                EFFECTIVE_L2_QUEUE_SIZE(7) |
5432                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5433         WREG32(VM_L2_CNTL2, 0);
5434         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5435                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5436         radeon_gart_table_vram_unpin(rdev);
5437 }
5438
5439 /**
5440  * cik_pcie_gart_fini - vm fini callback
5441  *
5442  * @rdev: radeon_device pointer
5443  *
5444  * Tears down the driver GART/VM setup (CIK).
5445  */
5446 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5447 {
5448         cik_pcie_gart_disable(rdev);
5449         radeon_gart_table_vram_free(rdev);
5450         radeon_gart_fini(rdev);
5451 }
5452
5453 /* vm parser */
5454 /**
5455  * cik_ib_parse - vm ib_parse callback
5456  *
5457  * @rdev: radeon_device pointer
5458  * @ib: indirect buffer pointer
5459  *
5460  * CIK uses hw IB checking so this is a nop (CIK).
5461  */
5462 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5463 {
5464         return 0;
5465 }
5466
5467 /*
5468  * vm
5469  * VMID 0 is the physical GPU addresses as used by the kernel.
5470  * VMIDs 1-15 are used for userspace clients and are handled
5471  * by the radeon vm/hsa code.
5472  */
5473 /**
5474  * cik_vm_init - cik vm init callback
5475  *
5476  * @rdev: radeon_device pointer
5477  *
5478  * Inits cik specific vm parameters (number of VMs, base of vram for
5479  * VMIDs 1-15) (CIK).
5480  * Returns 0 for success.
5481  */
5482 int cik_vm_init(struct radeon_device *rdev)
5483 {
5484         /* number of VMs */
5485         rdev->vm_manager.nvm = 16;
5486         /* base offset of vram pages */
5487         if (rdev->flags & RADEON_IS_IGP) {
5488                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5489                 tmp <<= 22;
5490                 rdev->vm_manager.vram_base_offset = tmp;
5491         } else
5492                 rdev->vm_manager.vram_base_offset = 0;
5493
5494         return 0;
5495 }
5496
5497 /**
5498  * cik_vm_fini - cik vm fini callback
5499  *
5500  * @rdev: radeon_device pointer
5501  *
5502  * Tear down any asic specific VM setup (CIK).
5503  */
5504 void cik_vm_fini(struct radeon_device *rdev)
5505 {
5506 }
5507
5508 /**
5509  * cik_vm_decode_fault - print human readable fault info
5510  *
5511  * @rdev: radeon_device pointer
5512  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5513  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5514  *
5515  * Print human readable fault information (CIK).
5516  */
5517 static void cik_vm_decode_fault(struct radeon_device *rdev,
5518                                 u32 status, u32 addr, u32 mc_client)
5519 {
5520         u32 mc_id;
5521         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5522         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5523         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5524                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5525
5526         if (rdev->family == CHIP_HAWAII)
5527                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5528         else
5529                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5530
5531         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5532                protections, vmid, addr,
5533                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5534                block, mc_client, mc_id);
5535 }
5536
5537 /**
5538  * cik_vm_flush - cik vm flush using the CP
5539  *
5540  * @rdev: radeon_device pointer
5541  *
5542  * Update the page table base and flush the VM TLB
5543  * using the CP (CIK).
5544  */
5545 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5546 {
5547         struct radeon_ring *ring = &rdev->ring[ridx];
5548         int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX);
5549
5550         if (vm == NULL)
5551                 return;
5552
5553         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5554         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5555                                  WRITE_DATA_DST_SEL(0)));
5556         if (vm->id < 8) {
5557                 radeon_ring_write(ring,
5558                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5559         } else {
5560                 radeon_ring_write(ring,
5561                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5562         }
5563         radeon_ring_write(ring, 0);
5564         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5565
5566         /* update SH_MEM_* regs */
5567         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5568         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5569                                  WRITE_DATA_DST_SEL(0)));
5570         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5571         radeon_ring_write(ring, 0);
5572         radeon_ring_write(ring, VMID(vm->id));
5573
5574         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5575         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5576                                  WRITE_DATA_DST_SEL(0)));
5577         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5578         radeon_ring_write(ring, 0);
5579
5580         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5581         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5582         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5583         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5584
5585         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5586         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5587                                  WRITE_DATA_DST_SEL(0)));
5588         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5589         radeon_ring_write(ring, 0);
5590         radeon_ring_write(ring, VMID(0));
5591
5592         /* HDP flush */
5593         cik_hdp_flush_cp_ring_emit(rdev, ridx);
5594
5595         /* bits 0-15 are the VM contexts0-15 */
5596         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5597         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5598                                  WRITE_DATA_DST_SEL(0)));
5599         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5600         radeon_ring_write(ring, 0);
5601         radeon_ring_write(ring, 1 << vm->id);
5602
5603         /* compute doesn't have PFP */
5604         if (usepfp) {
5605                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5606                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5607                 radeon_ring_write(ring, 0x0);
5608         }
5609 }
5610
5611 /*
5612  * RLC
5613  * The RLC is a multi-purpose microengine that handles a
5614  * variety of functions, the most important of which is
5615  * the interrupt controller.
5616  */
5617 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5618                                           bool enable)
5619 {
5620         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5621
5622         if (enable)
5623                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5624         else
5625                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5626         WREG32(CP_INT_CNTL_RING0, tmp);
5627 }
5628
5629 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5630 {
5631         u32 tmp;
5632
5633         tmp = RREG32(RLC_LB_CNTL);
5634         if (enable)
5635                 tmp |= LOAD_BALANCE_ENABLE;
5636         else
5637                 tmp &= ~LOAD_BALANCE_ENABLE;
5638         WREG32(RLC_LB_CNTL, tmp);
5639 }
5640
5641 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5642 {
5643         u32 i, j, k;
5644         u32 mask;
5645
5646         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5647                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5648                         cik_select_se_sh(rdev, i, j);
5649                         for (k = 0; k < rdev->usec_timeout; k++) {
5650                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5651                                         break;
5652                                 udelay(1);
5653                         }
5654                 }
5655         }
5656         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5657
5658         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5659         for (k = 0; k < rdev->usec_timeout; k++) {
5660                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5661                         break;
5662                 udelay(1);
5663         }
5664 }
5665
5666 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5667 {
5668         u32 tmp;
5669
5670         tmp = RREG32(RLC_CNTL);
5671         if (tmp != rlc)
5672                 WREG32(RLC_CNTL, rlc);
5673 }
5674
5675 static u32 cik_halt_rlc(struct radeon_device *rdev)
5676 {
5677         u32 data, orig;
5678
5679         orig = data = RREG32(RLC_CNTL);
5680
5681         if (data & RLC_ENABLE) {
5682                 u32 i;
5683
5684                 data &= ~RLC_ENABLE;
5685                 WREG32(RLC_CNTL, data);
5686
5687                 for (i = 0; i < rdev->usec_timeout; i++) {
5688                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5689                                 break;
5690                         udelay(1);
5691                 }
5692
5693                 cik_wait_for_rlc_serdes(rdev);
5694         }
5695
5696         return orig;
5697 }
5698
5699 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5700 {
5701         u32 tmp, i, mask;
5702
5703         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5704         WREG32(RLC_GPR_REG2, tmp);
5705
5706         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5707         for (i = 0; i < rdev->usec_timeout; i++) {
5708                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5709                         break;
5710                 udelay(1);
5711         }
5712
5713         for (i = 0; i < rdev->usec_timeout; i++) {
5714                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5715                         break;
5716                 udelay(1);
5717         }
5718 }
5719
5720 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5721 {
5722         u32 tmp;
5723
5724         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5725         WREG32(RLC_GPR_REG2, tmp);
5726 }
5727
5728 /**
5729  * cik_rlc_stop - stop the RLC ME
5730  *
5731  * @rdev: radeon_device pointer
5732  *
5733  * Halt the RLC ME (MicroEngine) (CIK).
5734  */
5735 static void cik_rlc_stop(struct radeon_device *rdev)
5736 {
5737         WREG32(RLC_CNTL, 0);
5738
5739         cik_enable_gui_idle_interrupt(rdev, false);
5740
5741         cik_wait_for_rlc_serdes(rdev);
5742 }
5743
5744 /**
5745  * cik_rlc_start - start the RLC ME
5746  *
5747  * @rdev: radeon_device pointer
5748  *
5749  * Unhalt the RLC ME (MicroEngine) (CIK).
5750  */
5751 static void cik_rlc_start(struct radeon_device *rdev)
5752 {
5753         WREG32(RLC_CNTL, RLC_ENABLE);
5754
5755         cik_enable_gui_idle_interrupt(rdev, true);
5756
5757         udelay(50);
5758 }
5759
5760 /**
5761  * cik_rlc_resume - setup the RLC hw
5762  *
5763  * @rdev: radeon_device pointer
5764  *
5765  * Initialize the RLC registers, load the ucode,
5766  * and start the RLC (CIK).
5767  * Returns 0 for success, -EINVAL if the ucode is not available.
5768  */
5769 static int cik_rlc_resume(struct radeon_device *rdev)
5770 {
5771         u32 i, size, tmp;
5772         const __be32 *fw_data;
5773
5774         if (!rdev->rlc_fw)
5775                 return -EINVAL;
5776
5777         switch (rdev->family) {
5778         case CHIP_BONAIRE:
5779         case CHIP_HAWAII:
5780         default:
5781                 size = BONAIRE_RLC_UCODE_SIZE;
5782                 break;
5783         case CHIP_KAVERI:
5784                 size = KV_RLC_UCODE_SIZE;
5785                 break;
5786         case CHIP_KABINI:
5787                 size = KB_RLC_UCODE_SIZE;
5788                 break;
5789         }
5790
5791         cik_rlc_stop(rdev);
5792
5793         /* disable CG */
5794         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5795         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5796
5797         si_rlc_reset(rdev);
5798
5799         cik_init_pg(rdev);
5800
5801         cik_init_cg(rdev);
5802
5803         WREG32(RLC_LB_CNTR_INIT, 0);
5804         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5805
5806         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5807         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5808         WREG32(RLC_LB_PARAMS, 0x00600408);
5809         WREG32(RLC_LB_CNTL, 0x80000004);
5810
5811         WREG32(RLC_MC_CNTL, 0);
5812         WREG32(RLC_UCODE_CNTL, 0);
5813
5814         fw_data = (const __be32 *)rdev->rlc_fw->data;
5815                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5816         for (i = 0; i < size; i++)
5817                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5818         WREG32(RLC_GPM_UCODE_ADDR, 0);
5819
5820         /* XXX - find out what chips support lbpw */
5821         cik_enable_lbpw(rdev, false);
5822
5823         if (rdev->family == CHIP_BONAIRE)
5824                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5825
5826         cik_rlc_start(rdev);
5827
5828         return 0;
5829 }
5830
5831 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5832 {
5833         u32 data, orig, tmp, tmp2;
5834
5835         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5836
5837         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5838                 cik_enable_gui_idle_interrupt(rdev, true);
5839
5840                 tmp = cik_halt_rlc(rdev);
5841
5842                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5843                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5844                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5845                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5846                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5847
5848                 cik_update_rlc(rdev, tmp);
5849
5850                 data |= CGCG_EN | CGLS_EN;
5851         } else {
5852                 cik_enable_gui_idle_interrupt(rdev, false);
5853
5854                 RREG32(CB_CGTT_SCLK_CTRL);
5855                 RREG32(CB_CGTT_SCLK_CTRL);
5856                 RREG32(CB_CGTT_SCLK_CTRL);
5857                 RREG32(CB_CGTT_SCLK_CTRL);
5858
5859                 data &= ~(CGCG_EN | CGLS_EN);
5860         }
5861
5862         if (orig != data)
5863                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5864
5865 }
5866
5867 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5868 {
5869         u32 data, orig, tmp = 0;
5870
5871         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5872                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5873                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5874                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5875                                 data |= CP_MEM_LS_EN;
5876                                 if (orig != data)
5877                                         WREG32(CP_MEM_SLP_CNTL, data);
5878                         }
5879                 }
5880
5881                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5882                 data &= 0xfffffffd;
5883                 if (orig != data)
5884                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5885
5886                 tmp = cik_halt_rlc(rdev);
5887
5888                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5889                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5890                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5891                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5892                 WREG32(RLC_SERDES_WR_CTRL, data);
5893
5894                 cik_update_rlc(rdev, tmp);
5895
5896                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5897                         orig = data = RREG32(CGTS_SM_CTRL_REG);
5898                         data &= ~SM_MODE_MASK;
5899                         data |= SM_MODE(0x2);
5900                         data |= SM_MODE_ENABLE;
5901                         data &= ~CGTS_OVERRIDE;
5902                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5903                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5904                                 data &= ~CGTS_LS_OVERRIDE;
5905                         data &= ~ON_MONITOR_ADD_MASK;
5906                         data |= ON_MONITOR_ADD_EN;
5907                         data |= ON_MONITOR_ADD(0x96);
5908                         if (orig != data)
5909                                 WREG32(CGTS_SM_CTRL_REG, data);
5910                 }
5911         } else {
5912                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5913                 data |= 0x00000002;
5914                 if (orig != data)
5915                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5916
5917                 data = RREG32(RLC_MEM_SLP_CNTL);
5918                 if (data & RLC_MEM_LS_EN) {
5919                         data &= ~RLC_MEM_LS_EN;
5920                         WREG32(RLC_MEM_SLP_CNTL, data);
5921                 }
5922
5923                 data = RREG32(CP_MEM_SLP_CNTL);
5924                 if (data & CP_MEM_LS_EN) {
5925                         data &= ~CP_MEM_LS_EN;
5926                         WREG32(CP_MEM_SLP_CNTL, data);
5927                 }
5928
5929                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5930                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5931                 if (orig != data)
5932                         WREG32(CGTS_SM_CTRL_REG, data);
5933
5934                 tmp = cik_halt_rlc(rdev);
5935
5936                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5937                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5938                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5939                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5940                 WREG32(RLC_SERDES_WR_CTRL, data);
5941
5942                 cik_update_rlc(rdev, tmp);
5943         }
5944 }
5945
5946 static const u32 mc_cg_registers[] =
5947 {
5948         MC_HUB_MISC_HUB_CG,
5949         MC_HUB_MISC_SIP_CG,
5950         MC_HUB_MISC_VM_CG,
5951         MC_XPB_CLK_GAT,
5952         ATC_MISC_CG,
5953         MC_CITF_MISC_WR_CG,
5954         MC_CITF_MISC_RD_CG,
5955         MC_CITF_MISC_VM_CG,
5956         VM_L2_CG,
5957 };
5958
5959 static void cik_enable_mc_ls(struct radeon_device *rdev,
5960                              bool enable)
5961 {
5962         int i;
5963         u32 orig, data;
5964
5965         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5966                 orig = data = RREG32(mc_cg_registers[i]);
5967                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5968                         data |= MC_LS_ENABLE;
5969                 else
5970                         data &= ~MC_LS_ENABLE;
5971                 if (data != orig)
5972                         WREG32(mc_cg_registers[i], data);
5973         }
5974 }
5975
5976 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5977                                bool enable)
5978 {
5979         int i;
5980         u32 orig, data;
5981
5982         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5983                 orig = data = RREG32(mc_cg_registers[i]);
5984                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5985                         data |= MC_CG_ENABLE;
5986                 else
5987                         data &= ~MC_CG_ENABLE;
5988                 if (data != orig)
5989                         WREG32(mc_cg_registers[i], data);
5990         }
5991 }
5992
5993 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5994                                  bool enable)
5995 {
5996         u32 orig, data;
5997
5998         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5999                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6000                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6001         } else {
6002                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6003                 data |= 0xff000000;
6004                 if (data != orig)
6005                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6006
6007                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6008                 data |= 0xff000000;
6009                 if (data != orig)
6010                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6011         }
6012 }
6013
6014 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6015                                  bool enable)
6016 {
6017         u32 orig, data;
6018
6019         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6020                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6021                 data |= 0x100;
6022                 if (orig != data)
6023                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6024
6025                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6026                 data |= 0x100;
6027                 if (orig != data)
6028                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6029         } else {
6030                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6031                 data &= ~0x100;
6032                 if (orig != data)
6033                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6034
6035                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6036                 data &= ~0x100;
6037                 if (orig != data)
6038                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6039         }
6040 }
6041
6042 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6043                                 bool enable)
6044 {
6045         u32 orig, data;
6046
6047         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6048                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6049                 data = 0xfff;
6050                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6051
6052                 orig = data = RREG32(UVD_CGC_CTRL);
6053                 data |= DCM;
6054                 if (orig != data)
6055                         WREG32(UVD_CGC_CTRL, data);
6056         } else {
6057                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6058                 data &= ~0xfff;
6059                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6060
6061                 orig = data = RREG32(UVD_CGC_CTRL);
6062                 data &= ~DCM;
6063                 if (orig != data)
6064                         WREG32(UVD_CGC_CTRL, data);
6065         }
6066 }
6067
6068 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6069                                bool enable)
6070 {
6071         u32 orig, data;
6072
6073         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6074
6075         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6076                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6077                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6078         else
6079                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6080                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6081
6082         if (orig != data)
6083                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6084 }
6085
6086 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6087                                 bool enable)
6088 {
6089         u32 orig, data;
6090
6091         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6092
6093         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6094                 data &= ~CLOCK_GATING_DIS;
6095         else
6096                 data |= CLOCK_GATING_DIS;
6097
6098         if (orig != data)
6099                 WREG32(HDP_HOST_PATH_CNTL, data);
6100 }
6101
6102 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6103                               bool enable)
6104 {
6105         u32 orig, data;
6106
6107         orig = data = RREG32(HDP_MEM_POWER_LS);
6108
6109         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6110                 data |= HDP_LS_ENABLE;
6111         else
6112                 data &= ~HDP_LS_ENABLE;
6113
6114         if (orig != data)
6115                 WREG32(HDP_MEM_POWER_LS, data);
6116 }
6117
6118 void cik_update_cg(struct radeon_device *rdev,
6119                    u32 block, bool enable)
6120 {
6121
6122         if (block & RADEON_CG_BLOCK_GFX) {
6123                 cik_enable_gui_idle_interrupt(rdev, false);
6124                 /* order matters! */
6125                 if (enable) {
6126                         cik_enable_mgcg(rdev, true);
6127                         cik_enable_cgcg(rdev, true);
6128                 } else {
6129                         cik_enable_cgcg(rdev, false);
6130                         cik_enable_mgcg(rdev, false);
6131                 }
6132                 cik_enable_gui_idle_interrupt(rdev, true);
6133         }
6134
6135         if (block & RADEON_CG_BLOCK_MC) {
6136                 if (!(rdev->flags & RADEON_IS_IGP)) {
6137                         cik_enable_mc_mgcg(rdev, enable);
6138                         cik_enable_mc_ls(rdev, enable);
6139                 }
6140         }
6141
6142         if (block & RADEON_CG_BLOCK_SDMA) {
6143                 cik_enable_sdma_mgcg(rdev, enable);
6144                 cik_enable_sdma_mgls(rdev, enable);
6145         }
6146
6147         if (block & RADEON_CG_BLOCK_BIF) {
6148                 cik_enable_bif_mgls(rdev, enable);
6149         }
6150
6151         if (block & RADEON_CG_BLOCK_UVD) {
6152                 if (rdev->has_uvd)
6153                         cik_enable_uvd_mgcg(rdev, enable);
6154         }
6155
6156         if (block & RADEON_CG_BLOCK_HDP) {
6157                 cik_enable_hdp_mgcg(rdev, enable);
6158                 cik_enable_hdp_ls(rdev, enable);
6159         }
6160 }
6161
6162 static void cik_init_cg(struct radeon_device *rdev)
6163 {
6164
6165         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6166
6167         if (rdev->has_uvd)
6168                 si_init_uvd_internal_cg(rdev);
6169
6170         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6171                              RADEON_CG_BLOCK_SDMA |
6172                              RADEON_CG_BLOCK_BIF |
6173                              RADEON_CG_BLOCK_UVD |
6174                              RADEON_CG_BLOCK_HDP), true);
6175 }
6176
6177 static void cik_fini_cg(struct radeon_device *rdev)
6178 {
6179         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6180                              RADEON_CG_BLOCK_SDMA |
6181                              RADEON_CG_BLOCK_BIF |
6182                              RADEON_CG_BLOCK_UVD |
6183                              RADEON_CG_BLOCK_HDP), false);
6184
6185         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6186 }
6187
6188 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6189                                           bool enable)
6190 {
6191         u32 data, orig;
6192
6193         orig = data = RREG32(RLC_PG_CNTL);
6194         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6195                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6196         else
6197                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6198         if (orig != data)
6199                 WREG32(RLC_PG_CNTL, data);
6200 }
6201
6202 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6203                                           bool enable)
6204 {
6205         u32 data, orig;
6206
6207         orig = data = RREG32(RLC_PG_CNTL);
6208         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6209                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6210         else
6211                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6212         if (orig != data)
6213                 WREG32(RLC_PG_CNTL, data);
6214 }
6215
6216 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6217 {
6218         u32 data, orig;
6219
6220         orig = data = RREG32(RLC_PG_CNTL);
6221         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6222                 data &= ~DISABLE_CP_PG;
6223         else
6224                 data |= DISABLE_CP_PG;
6225         if (orig != data)
6226                 WREG32(RLC_PG_CNTL, data);
6227 }
6228
6229 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6230 {
6231         u32 data, orig;
6232
6233         orig = data = RREG32(RLC_PG_CNTL);
6234         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6235                 data &= ~DISABLE_GDS_PG;
6236         else
6237                 data |= DISABLE_GDS_PG;
6238         if (orig != data)
6239                 WREG32(RLC_PG_CNTL, data);
6240 }
6241
6242 #define CP_ME_TABLE_SIZE    96
6243 #define CP_ME_TABLE_OFFSET  2048
6244 #define CP_MEC_TABLE_OFFSET 4096
6245
6246 void cik_init_cp_pg_table(struct radeon_device *rdev)
6247 {
6248         const __be32 *fw_data;
6249         volatile u32 *dst_ptr;
6250         int me, i, max_me = 4;
6251         u32 bo_offset = 0;
6252         u32 table_offset;
6253
6254         if (rdev->family == CHIP_KAVERI)
6255                 max_me = 5;
6256
6257         if (rdev->rlc.cp_table_ptr == NULL)
6258                 return;
6259
6260         /* write the cp table buffer */
6261         dst_ptr = rdev->rlc.cp_table_ptr;
6262         for (me = 0; me < max_me; me++) {
6263                 if (me == 0) {
6264                         fw_data = (const __be32 *)rdev->ce_fw->data;
6265                         table_offset = CP_ME_TABLE_OFFSET;
6266                 } else if (me == 1) {
6267                         fw_data = (const __be32 *)rdev->pfp_fw->data;
6268                         table_offset = CP_ME_TABLE_OFFSET;
6269                 } else if (me == 2) {
6270                         fw_data = (const __be32 *)rdev->me_fw->data;
6271                         table_offset = CP_ME_TABLE_OFFSET;
6272                 } else {
6273                         fw_data = (const __be32 *)rdev->mec_fw->data;
6274                         table_offset = CP_MEC_TABLE_OFFSET;
6275                 }
6276
6277                 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6278                         dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6279                 }
6280                 bo_offset += CP_ME_TABLE_SIZE;
6281         }
6282 }
6283
6284 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6285                                 bool enable)
6286 {
6287         u32 data, orig;
6288
6289         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6290                 orig = data = RREG32(RLC_PG_CNTL);
6291                 data |= GFX_PG_ENABLE;
6292                 if (orig != data)
6293                         WREG32(RLC_PG_CNTL, data);
6294
6295                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6296                 data |= AUTO_PG_EN;
6297                 if (orig != data)
6298                         WREG32(RLC_AUTO_PG_CTRL, data);
6299         } else {
6300                 orig = data = RREG32(RLC_PG_CNTL);
6301                 data &= ~GFX_PG_ENABLE;
6302                 if (orig != data)
6303                         WREG32(RLC_PG_CNTL, data);
6304
6305                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6306                 data &= ~AUTO_PG_EN;
6307                 if (orig != data)
6308                         WREG32(RLC_AUTO_PG_CTRL, data);
6309
6310                 data = RREG32(DB_RENDER_CONTROL);
6311         }
6312 }
6313
6314 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6315 {
6316         u32 mask = 0, tmp, tmp1;
6317         int i;
6318
6319         cik_select_se_sh(rdev, se, sh);
6320         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6321         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6322         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6323
6324         tmp &= 0xffff0000;
6325
6326         tmp |= tmp1;
6327         tmp >>= 16;
6328
6329         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6330                 mask <<= 1;
6331                 mask |= 1;
6332         }
6333
6334         return (~tmp) & mask;
6335 }
6336
6337 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6338 {
6339         u32 i, j, k, active_cu_number = 0;
6340         u32 mask, counter, cu_bitmap;
6341         u32 tmp = 0;
6342
6343         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6344                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6345                         mask = 1;
6346                         cu_bitmap = 0;
6347                         counter = 0;
6348                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6349                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6350                                         if (counter < 2)
6351                                                 cu_bitmap |= mask;
6352                                         counter ++;
6353                                 }
6354                                 mask <<= 1;
6355                         }
6356
6357                         active_cu_number += counter;
6358                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6359                 }
6360         }
6361
6362         WREG32(RLC_PG_AO_CU_MASK, tmp);
6363
6364         tmp = RREG32(RLC_MAX_PG_CU);
6365         tmp &= ~MAX_PU_CU_MASK;
6366         tmp |= MAX_PU_CU(active_cu_number);
6367         WREG32(RLC_MAX_PG_CU, tmp);
6368 }
6369
6370 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6371                                        bool enable)
6372 {
6373         u32 data, orig;
6374
6375         orig = data = RREG32(RLC_PG_CNTL);
6376         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6377                 data |= STATIC_PER_CU_PG_ENABLE;
6378         else
6379                 data &= ~STATIC_PER_CU_PG_ENABLE;
6380         if (orig != data)
6381                 WREG32(RLC_PG_CNTL, data);
6382 }
6383
6384 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6385                                         bool enable)
6386 {
6387         u32 data, orig;
6388
6389         orig = data = RREG32(RLC_PG_CNTL);
6390         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6391                 data |= DYN_PER_CU_PG_ENABLE;
6392         else
6393                 data &= ~DYN_PER_CU_PG_ENABLE;
6394         if (orig != data)
6395                 WREG32(RLC_PG_CNTL, data);
6396 }
6397
6398 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6399 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6400
6401 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6402 {
6403         u32 data, orig;
6404         u32 i;
6405
6406         if (rdev->rlc.cs_data) {
6407                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6408                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6409                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6410                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6411         } else {
6412                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6413                 for (i = 0; i < 3; i++)
6414                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6415         }
6416         if (rdev->rlc.reg_list) {
6417                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6418                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6419                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6420         }
6421
6422         orig = data = RREG32(RLC_PG_CNTL);
6423         data |= GFX_PG_SRC;
6424         if (orig != data)
6425                 WREG32(RLC_PG_CNTL, data);
6426
6427         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6428         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6429
6430         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6431         data &= ~IDLE_POLL_COUNT_MASK;
6432         data |= IDLE_POLL_COUNT(0x60);
6433         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6434
6435         data = 0x10101010;
6436         WREG32(RLC_PG_DELAY, data);
6437
6438         data = RREG32(RLC_PG_DELAY_2);
6439         data &= ~0xff;
6440         data |= 0x3;
6441         WREG32(RLC_PG_DELAY_2, data);
6442
6443         data = RREG32(RLC_AUTO_PG_CTRL);
6444         data &= ~GRBM_REG_SGIT_MASK;
6445         data |= GRBM_REG_SGIT(0x700);
6446         WREG32(RLC_AUTO_PG_CTRL, data);
6447
6448 }
6449
6450 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6451 {
6452         cik_enable_gfx_cgpg(rdev, enable);
6453         cik_enable_gfx_static_mgpg(rdev, enable);
6454         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6455 }
6456
6457 u32 cik_get_csb_size(struct radeon_device *rdev)
6458 {
6459         u32 count = 0;
6460         const struct cs_section_def *sect = NULL;
6461         const struct cs_extent_def *ext = NULL;
6462
6463         if (rdev->rlc.cs_data == NULL)
6464                 return 0;
6465
6466         /* begin clear state */
6467         count += 2;
6468         /* context control state */
6469         count += 3;
6470
6471         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6472                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6473                         if (sect->id == SECT_CONTEXT)
6474                                 count += 2 + ext->reg_count;
6475                         else
6476                                 return 0;
6477                 }
6478         }
6479         /* pa_sc_raster_config/pa_sc_raster_config1 */
6480         count += 4;
6481         /* end clear state */
6482         count += 2;
6483         /* clear state */
6484         count += 2;
6485
6486         return count;
6487 }
6488
6489 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6490 {
6491         u32 count = 0, i;
6492         const struct cs_section_def *sect = NULL;
6493         const struct cs_extent_def *ext = NULL;
6494
6495         if (rdev->rlc.cs_data == NULL)
6496                 return;
6497         if (buffer == NULL)
6498                 return;
6499
6500         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6501         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6502
6503         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6504         buffer[count++] = cpu_to_le32(0x80000000);
6505         buffer[count++] = cpu_to_le32(0x80000000);
6506
6507         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6508                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6509                         if (sect->id == SECT_CONTEXT) {
6510                                 buffer[count++] =
6511                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6512                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6513                                 for (i = 0; i < ext->reg_count; i++)
6514                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6515                         } else {
6516                                 return;
6517                         }
6518                 }
6519         }
6520
6521         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6522         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6523         switch (rdev->family) {
6524         case CHIP_BONAIRE:
6525                 buffer[count++] = cpu_to_le32(0x16000012);
6526                 buffer[count++] = cpu_to_le32(0x00000000);
6527                 break;
6528         case CHIP_KAVERI:
6529                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6530                 buffer[count++] = cpu_to_le32(0x00000000);
6531                 break;
6532         case CHIP_KABINI:
6533                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6534                 buffer[count++] = cpu_to_le32(0x00000000);
6535                 break;
6536         case CHIP_HAWAII:
6537                 buffer[count++] = cpu_to_le32(0x3a00161a);
6538                 buffer[count++] = cpu_to_le32(0x0000002e);
6539                 break;
6540         default:
6541                 buffer[count++] = cpu_to_le32(0x00000000);
6542                 buffer[count++] = cpu_to_le32(0x00000000);
6543                 break;
6544         }
6545
6546         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6547         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6548
6549         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6550         buffer[count++] = cpu_to_le32(0);
6551 }
6552
6553 static void cik_init_pg(struct radeon_device *rdev)
6554 {
6555         if (rdev->pg_flags) {
6556                 cik_enable_sck_slowdown_on_pu(rdev, true);
6557                 cik_enable_sck_slowdown_on_pd(rdev, true);
6558                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6559                         cik_init_gfx_cgpg(rdev);
6560                         cik_enable_cp_pg(rdev, true);
6561                         cik_enable_gds_pg(rdev, true);
6562                 }
6563                 cik_init_ao_cu_mask(rdev);
6564                 cik_update_gfx_pg(rdev, true);
6565         }
6566 }
6567
6568 static void cik_fini_pg(struct radeon_device *rdev)
6569 {
6570         if (rdev->pg_flags) {
6571                 cik_update_gfx_pg(rdev, false);
6572                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6573                         cik_enable_cp_pg(rdev, false);
6574                         cik_enable_gds_pg(rdev, false);
6575                 }
6576         }
6577 }
6578
6579 /*
6580  * Interrupts
6581  * Starting with r6xx, interrupts are handled via a ring buffer.
6582  * Ring buffers are areas of GPU accessible memory that the GPU
6583  * writes interrupt vectors into and the host reads vectors out of.
6584  * There is a rptr (read pointer) that determines where the
6585  * host is currently reading, and a wptr (write pointer)
6586  * which determines where the GPU has written.  When the
6587  * pointers are equal, the ring is idle.  When the GPU
6588  * writes vectors to the ring buffer, it increments the
6589  * wptr.  When there is an interrupt, the host then starts
6590  * fetching commands and processing them until the pointers are
6591  * equal again at which point it updates the rptr.
6592  */
6593
6594 /**
6595  * cik_enable_interrupts - Enable the interrupt ring buffer
6596  *
6597  * @rdev: radeon_device pointer
6598  *
6599  * Enable the interrupt ring buffer (CIK).
6600  */
6601 static void cik_enable_interrupts(struct radeon_device *rdev)
6602 {
6603         u32 ih_cntl = RREG32(IH_CNTL);
6604         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6605
6606         ih_cntl |= ENABLE_INTR;
6607         ih_rb_cntl |= IH_RB_ENABLE;
6608         WREG32(IH_CNTL, ih_cntl);
6609         WREG32(IH_RB_CNTL, ih_rb_cntl);
6610         rdev->ih.enabled = true;
6611 }
6612
6613 /**
6614  * cik_disable_interrupts - Disable the interrupt ring buffer
6615  *
6616  * @rdev: radeon_device pointer
6617  *
6618  * Disable the interrupt ring buffer (CIK).
6619  */
6620 static void cik_disable_interrupts(struct radeon_device *rdev)
6621 {
6622         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6623         u32 ih_cntl = RREG32(IH_CNTL);
6624
6625         ih_rb_cntl &= ~IH_RB_ENABLE;
6626         ih_cntl &= ~ENABLE_INTR;
6627         WREG32(IH_RB_CNTL, ih_rb_cntl);
6628         WREG32(IH_CNTL, ih_cntl);
6629         /* set rptr, wptr to 0 */
6630         WREG32(IH_RB_RPTR, 0);
6631         WREG32(IH_RB_WPTR, 0);
6632         rdev->ih.enabled = false;
6633         rdev->ih.rptr = 0;
6634 }
6635
6636 /**
6637  * cik_disable_interrupt_state - Disable all interrupt sources
6638  *
6639  * @rdev: radeon_device pointer
6640  *
6641  * Clear all interrupt enable bits used by the driver (CIK).
6642  */
6643 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6644 {
6645         u32 tmp;
6646
6647         /* gfx ring */
6648         tmp = RREG32(CP_INT_CNTL_RING0) &
6649                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6650         WREG32(CP_INT_CNTL_RING0, tmp);
6651         /* sdma */
6652         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6653         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6654         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6655         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6656         /* compute queues */
6657         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6658         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6659         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6660         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6661         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6662         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6663         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6664         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6665         /* grbm */
6666         WREG32(GRBM_INT_CNTL, 0);
6667         /* vline/vblank, etc. */
6668         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6669         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6670         if (rdev->num_crtc >= 4) {
6671                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6672                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6673         }
6674         if (rdev->num_crtc >= 6) {
6675                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6676                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6677         }
6678         /* pflip */
6679         if (rdev->num_crtc >= 2) {
6680                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6681                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6682         }
6683         if (rdev->num_crtc >= 4) {
6684                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6685                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6686         }
6687         if (rdev->num_crtc >= 6) {
6688                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6689                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6690         }
6691
6692         /* dac hotplug */
6693         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6694
6695         /* digital hotplug */
6696         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6697         WREG32(DC_HPD1_INT_CONTROL, tmp);
6698         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6699         WREG32(DC_HPD2_INT_CONTROL, tmp);
6700         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6701         WREG32(DC_HPD3_INT_CONTROL, tmp);
6702         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6703         WREG32(DC_HPD4_INT_CONTROL, tmp);
6704         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6705         WREG32(DC_HPD5_INT_CONTROL, tmp);
6706         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6707         WREG32(DC_HPD6_INT_CONTROL, tmp);
6708
6709 }
6710
6711 /**
6712  * cik_irq_init - init and enable the interrupt ring
6713  *
6714  * @rdev: radeon_device pointer
6715  *
6716  * Allocate a ring buffer for the interrupt controller,
6717  * enable the RLC, disable interrupts, enable the IH
6718  * ring buffer and enable it (CIK).
6719  * Called at device load and reume.
6720  * Returns 0 for success, errors for failure.
6721  */
6722 static int cik_irq_init(struct radeon_device *rdev)
6723 {
6724         int ret = 0;
6725         int rb_bufsz;
6726         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6727
6728         /* allocate ring */
6729         ret = r600_ih_ring_alloc(rdev);
6730         if (ret)
6731                 return ret;
6732
6733         /* disable irqs */
6734         cik_disable_interrupts(rdev);
6735
6736         /* init rlc */
6737         ret = cik_rlc_resume(rdev);
6738         if (ret) {
6739                 r600_ih_ring_fini(rdev);
6740                 return ret;
6741         }
6742
6743         /* setup interrupt control */
6744         /* XXX this should actually be a bus address, not an MC address. same on older asics */
6745         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6746         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6747         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6748          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6749          */
6750         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6751         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6752         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6753         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6754
6755         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6756         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6757
6758         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6759                       IH_WPTR_OVERFLOW_CLEAR |
6760                       (rb_bufsz << 1));
6761
6762         if (rdev->wb.enabled)
6763                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6764
6765         /* set the writeback address whether it's enabled or not */
6766         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6767         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6768
6769         WREG32(IH_RB_CNTL, ih_rb_cntl);
6770
6771         /* set rptr, wptr to 0 */
6772         WREG32(IH_RB_RPTR, 0);
6773         WREG32(IH_RB_WPTR, 0);
6774
6775         /* Default settings for IH_CNTL (disabled at first) */
6776         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6777         /* RPTR_REARM only works if msi's are enabled */
6778         if (rdev->msi_enabled)
6779                 ih_cntl |= RPTR_REARM;
6780         WREG32(IH_CNTL, ih_cntl);
6781
6782         /* force the active interrupt state to all disabled */
6783         cik_disable_interrupt_state(rdev);
6784
6785         pci_set_master(rdev->pdev);
6786
6787         /* enable irqs */
6788         cik_enable_interrupts(rdev);
6789
6790         return ret;
6791 }
6792
6793 /**
6794  * cik_irq_set - enable/disable interrupt sources
6795  *
6796  * @rdev: radeon_device pointer
6797  *
6798  * Enable interrupt sources on the GPU (vblanks, hpd,
6799  * etc.) (CIK).
6800  * Returns 0 for success, errors for failure.
6801  */
6802 int cik_irq_set(struct radeon_device *rdev)
6803 {
6804         u32 cp_int_cntl;
6805         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6806         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6807         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6808         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6809         u32 grbm_int_cntl = 0;
6810         u32 dma_cntl, dma_cntl1;
6811         u32 thermal_int;
6812
6813         if (!rdev->irq.installed) {
6814                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6815                 return -EINVAL;
6816         }
6817         /* don't enable anything if the ih is disabled */
6818         if (!rdev->ih.enabled) {
6819                 cik_disable_interrupts(rdev);
6820                 /* force the active interrupt state to all disabled */
6821                 cik_disable_interrupt_state(rdev);
6822                 return 0;
6823         }
6824
6825         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6826                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6827         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6828
6829         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6830         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6831         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6832         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6833         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6834         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6835
6836         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6837         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6838
6839         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6840         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6841         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6842         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6843         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6844         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6845         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6846         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6847
6848         if (rdev->flags & RADEON_IS_IGP)
6849                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6850                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
6851         else
6852                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6853                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6854
6855         /* enable CP interrupts on all rings */
6856         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6857                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6858                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6859         }
6860         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6861                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6862                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6863                 if (ring->me == 1) {
6864                         switch (ring->pipe) {
6865                         case 0:
6866                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6867                                 break;
6868                         case 1:
6869                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6870                                 break;
6871                         case 2:
6872                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6873                                 break;
6874                         case 3:
6875                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6876                                 break;
6877                         default:
6878                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6879                                 break;
6880                         }
6881                 } else if (ring->me == 2) {
6882                         switch (ring->pipe) {
6883                         case 0:
6884                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6885                                 break;
6886                         case 1:
6887                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6888                                 break;
6889                         case 2:
6890                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6891                                 break;
6892                         case 3:
6893                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6894                                 break;
6895                         default:
6896                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6897                                 break;
6898                         }
6899                 } else {
6900                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6901                 }
6902         }
6903         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6904                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6905                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6906                 if (ring->me == 1) {
6907                         switch (ring->pipe) {
6908                         case 0:
6909                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6910                                 break;
6911                         case 1:
6912                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6913                                 break;
6914                         case 2:
6915                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6916                                 break;
6917                         case 3:
6918                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6919                                 break;
6920                         default:
6921                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6922                                 break;
6923                         }
6924                 } else if (ring->me == 2) {
6925                         switch (ring->pipe) {
6926                         case 0:
6927                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6928                                 break;
6929                         case 1:
6930                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6931                                 break;
6932                         case 2:
6933                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6934                                 break;
6935                         case 3:
6936                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6937                                 break;
6938                         default:
6939                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6940                                 break;
6941                         }
6942                 } else {
6943                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6944                 }
6945         }
6946
6947         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6948                 DRM_DEBUG("cik_irq_set: sw int dma\n");
6949                 dma_cntl |= TRAP_ENABLE;
6950         }
6951
6952         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6953                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6954                 dma_cntl1 |= TRAP_ENABLE;
6955         }
6956
6957         if (rdev->irq.crtc_vblank_int[0] ||
6958             atomic_read(&rdev->irq.pflip[0])) {
6959                 DRM_DEBUG("cik_irq_set: vblank 0\n");
6960                 crtc1 |= VBLANK_INTERRUPT_MASK;
6961         }
6962         if (rdev->irq.crtc_vblank_int[1] ||
6963             atomic_read(&rdev->irq.pflip[1])) {
6964                 DRM_DEBUG("cik_irq_set: vblank 1\n");
6965                 crtc2 |= VBLANK_INTERRUPT_MASK;
6966         }
6967         if (rdev->irq.crtc_vblank_int[2] ||
6968             atomic_read(&rdev->irq.pflip[2])) {
6969                 DRM_DEBUG("cik_irq_set: vblank 2\n");
6970                 crtc3 |= VBLANK_INTERRUPT_MASK;
6971         }
6972         if (rdev->irq.crtc_vblank_int[3] ||
6973             atomic_read(&rdev->irq.pflip[3])) {
6974                 DRM_DEBUG("cik_irq_set: vblank 3\n");
6975                 crtc4 |= VBLANK_INTERRUPT_MASK;
6976         }
6977         if (rdev->irq.crtc_vblank_int[4] ||
6978             atomic_read(&rdev->irq.pflip[4])) {
6979                 DRM_DEBUG("cik_irq_set: vblank 4\n");
6980                 crtc5 |= VBLANK_INTERRUPT_MASK;
6981         }
6982         if (rdev->irq.crtc_vblank_int[5] ||
6983             atomic_read(&rdev->irq.pflip[5])) {
6984                 DRM_DEBUG("cik_irq_set: vblank 5\n");
6985                 crtc6 |= VBLANK_INTERRUPT_MASK;
6986         }
6987         if (rdev->irq.hpd[0]) {
6988                 DRM_DEBUG("cik_irq_set: hpd 1\n");
6989                 hpd1 |= DC_HPDx_INT_EN;
6990         }
6991         if (rdev->irq.hpd[1]) {
6992                 DRM_DEBUG("cik_irq_set: hpd 2\n");
6993                 hpd2 |= DC_HPDx_INT_EN;
6994         }
6995         if (rdev->irq.hpd[2]) {
6996                 DRM_DEBUG("cik_irq_set: hpd 3\n");
6997                 hpd3 |= DC_HPDx_INT_EN;
6998         }
6999         if (rdev->irq.hpd[3]) {
7000                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7001                 hpd4 |= DC_HPDx_INT_EN;
7002         }
7003         if (rdev->irq.hpd[4]) {
7004                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7005                 hpd5 |= DC_HPDx_INT_EN;
7006         }
7007         if (rdev->irq.hpd[5]) {
7008                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7009                 hpd6 |= DC_HPDx_INT_EN;
7010         }
7011
7012         if (rdev->irq.dpm_thermal) {
7013                 DRM_DEBUG("dpm thermal\n");
7014                 if (rdev->flags & RADEON_IS_IGP)
7015                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7016                 else
7017                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7018         }
7019
7020         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7021
7022         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7023         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7024
7025         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7026         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7027         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7028         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7029         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7030         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7031         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7032         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7033
7034         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7035
7036         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7037         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7038         if (rdev->num_crtc >= 4) {
7039                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7040                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7041         }
7042         if (rdev->num_crtc >= 6) {
7043                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7044                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7045         }
7046
7047         if (rdev->num_crtc >= 2) {
7048                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7049                        GRPH_PFLIP_INT_MASK);
7050                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7051                        GRPH_PFLIP_INT_MASK);
7052         }
7053         if (rdev->num_crtc >= 4) {
7054                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7055                        GRPH_PFLIP_INT_MASK);
7056                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7057                        GRPH_PFLIP_INT_MASK);
7058         }
7059         if (rdev->num_crtc >= 6) {
7060                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7061                        GRPH_PFLIP_INT_MASK);
7062                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7063                        GRPH_PFLIP_INT_MASK);
7064         }
7065
7066         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7067         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7068         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7069         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7070         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7071         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7072
7073         if (rdev->flags & RADEON_IS_IGP)
7074                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7075         else
7076                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
7077
7078         return 0;
7079 }
7080
7081 /**
7082  * cik_irq_ack - ack interrupt sources
7083  *
7084  * @rdev: radeon_device pointer
7085  *
7086  * Ack interrupt sources on the GPU (vblanks, hpd,
7087  * etc.) (CIK).  Certain interrupts sources are sw
7088  * generated and do not require an explicit ack.
7089  */
7090 static inline void cik_irq_ack(struct radeon_device *rdev)
7091 {
7092         u32 tmp;
7093
7094         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7095         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7096         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7097         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7098         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7099         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7100         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7101
7102         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7103                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7104         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7105                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7106         if (rdev->num_crtc >= 4) {
7107                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7108                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7109                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7110                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7111         }
7112         if (rdev->num_crtc >= 6) {
7113                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7114                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7115                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7116                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7117         }
7118
7119         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7120                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7121                        GRPH_PFLIP_INT_CLEAR);
7122         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7123                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7124                        GRPH_PFLIP_INT_CLEAR);
7125         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7126                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7127         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7128                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7129         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7130                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7131         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7132                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7133
7134         if (rdev->num_crtc >= 4) {
7135                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7136                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7137                                GRPH_PFLIP_INT_CLEAR);
7138                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7139                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7140                                GRPH_PFLIP_INT_CLEAR);
7141                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7142                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7143                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7144                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7145                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7146                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7147                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7148                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7149         }
7150
7151         if (rdev->num_crtc >= 6) {
7152                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7153                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7154                                GRPH_PFLIP_INT_CLEAR);
7155                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7156                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7157                                GRPH_PFLIP_INT_CLEAR);
7158                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7159                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7160                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7161                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7162                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7163                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7164                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7165                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7166         }
7167
7168         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7169                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7170                 tmp |= DC_HPDx_INT_ACK;
7171                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7172         }
7173         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7174                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7175                 tmp |= DC_HPDx_INT_ACK;
7176                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7177         }
7178         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7179                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7180                 tmp |= DC_HPDx_INT_ACK;
7181                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7182         }
7183         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7184                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7185                 tmp |= DC_HPDx_INT_ACK;
7186                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7187         }
7188         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7189                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7190                 tmp |= DC_HPDx_INT_ACK;
7191                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7192         }
7193         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7194                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7195                 tmp |= DC_HPDx_INT_ACK;
7196                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7197         }
7198 }
7199
7200 /**
7201  * cik_irq_disable - disable interrupts
7202  *
7203  * @rdev: radeon_device pointer
7204  *
7205  * Disable interrupts on the hw (CIK).
7206  */
7207 static void cik_irq_disable(struct radeon_device *rdev)
7208 {
7209         cik_disable_interrupts(rdev);
7210         /* Wait and acknowledge irq */
7211         mdelay(1);
7212         cik_irq_ack(rdev);
7213         cik_disable_interrupt_state(rdev);
7214 }
7215
7216 /**
7217  * cik_irq_disable - disable interrupts for suspend
7218  *
7219  * @rdev: radeon_device pointer
7220  *
7221  * Disable interrupts and stop the RLC (CIK).
7222  * Used for suspend.
7223  */
7224 static void cik_irq_suspend(struct radeon_device *rdev)
7225 {
7226         cik_irq_disable(rdev);
7227         cik_rlc_stop(rdev);
7228 }
7229
7230 /**
7231  * cik_irq_fini - tear down interrupt support
7232  *
7233  * @rdev: radeon_device pointer
7234  *
7235  * Disable interrupts on the hw and free the IH ring
7236  * buffer (CIK).
7237  * Used for driver unload.
7238  */
7239 static void cik_irq_fini(struct radeon_device *rdev)
7240 {
7241         cik_irq_suspend(rdev);
7242         r600_ih_ring_fini(rdev);
7243 }
7244
7245 /**
7246  * cik_get_ih_wptr - get the IH ring buffer wptr
7247  *
7248  * @rdev: radeon_device pointer
7249  *
7250  * Get the IH ring buffer wptr from either the register
7251  * or the writeback memory buffer (CIK).  Also check for
7252  * ring buffer overflow and deal with it.
7253  * Used by cik_irq_process().
7254  * Returns the value of the wptr.
7255  */
7256 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7257 {
7258         u32 wptr, tmp;
7259
7260         if (rdev->wb.enabled)
7261                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7262         else
7263                 wptr = RREG32(IH_RB_WPTR);
7264
7265         if (wptr & RB_OVERFLOW) {
7266                 /* When a ring buffer overflow happen start parsing interrupt
7267                  * from the last not overwritten vector (wptr + 16). Hopefully
7268                  * this should allow us to catchup.
7269                  */
7270                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7271                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7272                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7273                 tmp = RREG32(IH_RB_CNTL);
7274                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7275                 WREG32(IH_RB_CNTL, tmp);
7276                 wptr &= ~RB_OVERFLOW;
7277         }
7278         return (wptr & rdev->ih.ptr_mask);
7279 }
7280
7281 /*        CIK IV Ring
7282  * Each IV ring entry is 128 bits:
7283  * [7:0]    - interrupt source id
7284  * [31:8]   - reserved
7285  * [59:32]  - interrupt source data
7286  * [63:60]  - reserved
7287  * [71:64]  - RINGID
7288  *            CP:
7289  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7290  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7291  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7292  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7293  *            PIPE_ID - ME0 0=3D
7294  *                    - ME1&2 compute dispatcher (4 pipes each)
7295  *            SDMA:
7296  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7297  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7298  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7299  * [79:72]  - VMID
7300  * [95:80]  - PASID
7301  * [127:96] - reserved
7302  */
7303 /**
7304  * cik_irq_process - interrupt handler
7305  *
7306  * @rdev: radeon_device pointer
7307  *
7308  * Interrupt hander (CIK).  Walk the IH ring,
7309  * ack interrupts and schedule work to handle
7310  * interrupt events.
7311  * Returns irq process return code.
7312  */
7313 int cik_irq_process(struct radeon_device *rdev)
7314 {
7315         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7316         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7317         u32 wptr;
7318         u32 rptr;
7319         u32 src_id, src_data, ring_id;
7320         u8 me_id, pipe_id, queue_id;
7321         u32 ring_index;
7322         bool queue_hotplug = false;
7323         bool queue_reset = false;
7324         u32 addr, status, mc_client;
7325         bool queue_thermal = false;
7326
7327         if (!rdev->ih.enabled || rdev->shutdown)
7328                 return IRQ_NONE;
7329
7330         wptr = cik_get_ih_wptr(rdev);
7331
7332 restart_ih:
7333         /* is somebody else already processing irqs? */
7334         if (atomic_xchg(&rdev->ih.lock, 1))
7335                 return IRQ_NONE;
7336
7337         rptr = rdev->ih.rptr;
7338         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7339
7340         /* Order reading of wptr vs. reading of IH ring data */
7341         rmb();
7342
7343         /* display interrupts */
7344         cik_irq_ack(rdev);
7345
7346         while (rptr != wptr) {
7347                 /* wptr/rptr are in bytes! */
7348                 ring_index = rptr / 4;
7349                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7350                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7351                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7352
7353                 switch (src_id) {
7354                 case 1: /* D1 vblank/vline */
7355                         switch (src_data) {
7356                         case 0: /* D1 vblank */
7357                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7358                                         if (rdev->irq.crtc_vblank_int[0]) {
7359                                                 drm_handle_vblank(rdev->ddev, 0);
7360                                                 rdev->pm.vblank_sync = true;
7361                                                 wake_up(&rdev->irq.vblank_queue);
7362                                         }
7363                                         if (atomic_read(&rdev->irq.pflip[0]))
7364                                                 radeon_crtc_handle_flip(rdev, 0);
7365                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7366                                         DRM_DEBUG("IH: D1 vblank\n");
7367                                 }
7368                                 break;
7369                         case 1: /* D1 vline */
7370                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7371                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7372                                         DRM_DEBUG("IH: D1 vline\n");
7373                                 }
7374                                 break;
7375                         default:
7376                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7377                                 break;
7378                         }
7379                         break;
7380                 case 2: /* D2 vblank/vline */
7381                         switch (src_data) {
7382                         case 0: /* D2 vblank */
7383                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7384                                         if (rdev->irq.crtc_vblank_int[1]) {
7385                                                 drm_handle_vblank(rdev->ddev, 1);
7386                                                 rdev->pm.vblank_sync = true;
7387                                                 wake_up(&rdev->irq.vblank_queue);
7388                                         }
7389                                         if (atomic_read(&rdev->irq.pflip[1]))
7390                                                 radeon_crtc_handle_flip(rdev, 1);
7391                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7392                                         DRM_DEBUG("IH: D2 vblank\n");
7393                                 }
7394                                 break;
7395                         case 1: /* D2 vline */
7396                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7397                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7398                                         DRM_DEBUG("IH: D2 vline\n");
7399                                 }
7400                                 break;
7401                         default:
7402                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7403                                 break;
7404                         }
7405                         break;
7406                 case 3: /* D3 vblank/vline */
7407                         switch (src_data) {
7408                         case 0: /* D3 vblank */
7409                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7410                                         if (rdev->irq.crtc_vblank_int[2]) {
7411                                                 drm_handle_vblank(rdev->ddev, 2);
7412                                                 rdev->pm.vblank_sync = true;
7413                                                 wake_up(&rdev->irq.vblank_queue);
7414                                         }
7415                                         if (atomic_read(&rdev->irq.pflip[2]))
7416                                                 radeon_crtc_handle_flip(rdev, 2);
7417                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7418                                         DRM_DEBUG("IH: D3 vblank\n");
7419                                 }
7420                                 break;
7421                         case 1: /* D3 vline */
7422                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7423                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7424                                         DRM_DEBUG("IH: D3 vline\n");
7425                                 }
7426                                 break;
7427                         default:
7428                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7429                                 break;
7430                         }
7431                         break;
7432                 case 4: /* D4 vblank/vline */
7433                         switch (src_data) {
7434                         case 0: /* D4 vblank */
7435                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7436                                         if (rdev->irq.crtc_vblank_int[3]) {
7437                                                 drm_handle_vblank(rdev->ddev, 3);
7438                                                 rdev->pm.vblank_sync = true;
7439                                                 wake_up(&rdev->irq.vblank_queue);
7440                                         }
7441                                         if (atomic_read(&rdev->irq.pflip[3]))
7442                                                 radeon_crtc_handle_flip(rdev, 3);
7443                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7444                                         DRM_DEBUG("IH: D4 vblank\n");
7445                                 }
7446                                 break;
7447                         case 1: /* D4 vline */
7448                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7449                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7450                                         DRM_DEBUG("IH: D4 vline\n");
7451                                 }
7452                                 break;
7453                         default:
7454                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7455                                 break;
7456                         }
7457                         break;
7458                 case 5: /* D5 vblank/vline */
7459                         switch (src_data) {
7460                         case 0: /* D5 vblank */
7461                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7462                                         if (rdev->irq.crtc_vblank_int[4]) {
7463                                                 drm_handle_vblank(rdev->ddev, 4);
7464                                                 rdev->pm.vblank_sync = true;
7465                                                 wake_up(&rdev->irq.vblank_queue);
7466                                         }
7467                                         if (atomic_read(&rdev->irq.pflip[4]))
7468                                                 radeon_crtc_handle_flip(rdev, 4);
7469                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7470                                         DRM_DEBUG("IH: D5 vblank\n");
7471                                 }
7472                                 break;
7473                         case 1: /* D5 vline */
7474                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7475                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7476                                         DRM_DEBUG("IH: D5 vline\n");
7477                                 }
7478                                 break;
7479                         default:
7480                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7481                                 break;
7482                         }
7483                         break;
7484                 case 6: /* D6 vblank/vline */
7485                         switch (src_data) {
7486                         case 0: /* D6 vblank */
7487                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7488                                         if (rdev->irq.crtc_vblank_int[5]) {
7489                                                 drm_handle_vblank(rdev->ddev, 5);
7490                                                 rdev->pm.vblank_sync = true;
7491                                                 wake_up(&rdev->irq.vblank_queue);
7492                                         }
7493                                         if (atomic_read(&rdev->irq.pflip[5]))
7494                                                 radeon_crtc_handle_flip(rdev, 5);
7495                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7496                                         DRM_DEBUG("IH: D6 vblank\n");
7497                                 }
7498                                 break;
7499                         case 1: /* D6 vline */
7500                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7501                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7502                                         DRM_DEBUG("IH: D6 vline\n");
7503                                 }
7504                                 break;
7505                         default:
7506                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7507                                 break;
7508                         }
7509                         break;
7510                 case 8: /* D1 page flip */
7511                 case 10: /* D2 page flip */
7512                 case 12: /* D3 page flip */
7513                 case 14: /* D4 page flip */
7514                 case 16: /* D5 page flip */
7515                 case 18: /* D6 page flip */
7516                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7517                         radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7518                         break;
7519                 case 42: /* HPD hotplug */
7520                         switch (src_data) {
7521                         case 0:
7522                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7523                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7524                                         queue_hotplug = true;
7525                                         DRM_DEBUG("IH: HPD1\n");
7526                                 }
7527                                 break;
7528                         case 1:
7529                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7530                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7531                                         queue_hotplug = true;
7532                                         DRM_DEBUG("IH: HPD2\n");
7533                                 }
7534                                 break;
7535                         case 2:
7536                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7537                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7538                                         queue_hotplug = true;
7539                                         DRM_DEBUG("IH: HPD3\n");
7540                                 }
7541                                 break;
7542                         case 3:
7543                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7544                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7545                                         queue_hotplug = true;
7546                                         DRM_DEBUG("IH: HPD4\n");
7547                                 }
7548                                 break;
7549                         case 4:
7550                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7551                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7552                                         queue_hotplug = true;
7553                                         DRM_DEBUG("IH: HPD5\n");
7554                                 }
7555                                 break;
7556                         case 5:
7557                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7558                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7559                                         queue_hotplug = true;
7560                                         DRM_DEBUG("IH: HPD6\n");
7561                                 }
7562                                 break;
7563                         default:
7564                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7565                                 break;
7566                         }
7567                         break;
7568                 case 124: /* UVD */
7569                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7570                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7571                         break;
7572                 case 146:
7573                 case 147:
7574                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7575                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7576                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7577                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7578                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7579                                 addr);
7580                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7581                                 status);
7582                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7583                         /* reset addr and status */
7584                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7585                         break;
7586                 case 176: /* GFX RB CP_INT */
7587                 case 177: /* GFX IB CP_INT */
7588                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7589                         break;
7590                 case 181: /* CP EOP event */
7591                         DRM_DEBUG("IH: CP EOP\n");
7592                         /* XXX check the bitfield order! */
7593                         me_id = (ring_id & 0x60) >> 5;
7594                         pipe_id = (ring_id & 0x18) >> 3;
7595                         queue_id = (ring_id & 0x7) >> 0;
7596                         switch (me_id) {
7597                         case 0:
7598                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7599                                 break;
7600                         case 1:
7601                         case 2:
7602                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7603                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7604                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7605                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7606                                 break;
7607                         }
7608                         break;
7609                 case 184: /* CP Privileged reg access */
7610                         DRM_ERROR("Illegal register access in command stream\n");
7611                         /* XXX check the bitfield order! */
7612                         me_id = (ring_id & 0x60) >> 5;
7613                         pipe_id = (ring_id & 0x18) >> 3;
7614                         queue_id = (ring_id & 0x7) >> 0;
7615                         switch (me_id) {
7616                         case 0:
7617                                 /* This results in a full GPU reset, but all we need to do is soft
7618                                  * reset the CP for gfx
7619                                  */
7620                                 queue_reset = true;
7621                                 break;
7622                         case 1:
7623                                 /* XXX compute */
7624                                 queue_reset = true;
7625                                 break;
7626                         case 2:
7627                                 /* XXX compute */
7628                                 queue_reset = true;
7629                                 break;
7630                         }
7631                         break;
7632                 case 185: /* CP Privileged inst */
7633                         DRM_ERROR("Illegal instruction in command stream\n");
7634                         /* XXX check the bitfield order! */
7635                         me_id = (ring_id & 0x60) >> 5;
7636                         pipe_id = (ring_id & 0x18) >> 3;
7637                         queue_id = (ring_id & 0x7) >> 0;
7638                         switch (me_id) {
7639                         case 0:
7640                                 /* This results in a full GPU reset, but all we need to do is soft
7641                                  * reset the CP for gfx
7642                                  */
7643                                 queue_reset = true;
7644                                 break;
7645                         case 1:
7646                                 /* XXX compute */
7647                                 queue_reset = true;
7648                                 break;
7649                         case 2:
7650                                 /* XXX compute */
7651                                 queue_reset = true;
7652                                 break;
7653                         }
7654                         break;
7655                 case 224: /* SDMA trap event */
7656                         /* XXX check the bitfield order! */
7657                         me_id = (ring_id & 0x3) >> 0;
7658                         queue_id = (ring_id & 0xc) >> 2;
7659                         DRM_DEBUG("IH: SDMA trap\n");
7660                         switch (me_id) {
7661                         case 0:
7662                                 switch (queue_id) {
7663                                 case 0:
7664                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7665                                         break;
7666                                 case 1:
7667                                         /* XXX compute */
7668                                         break;
7669                                 case 2:
7670                                         /* XXX compute */
7671                                         break;
7672                                 }
7673                                 break;
7674                         case 1:
7675                                 switch (queue_id) {
7676                                 case 0:
7677                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7678                                         break;
7679                                 case 1:
7680                                         /* XXX compute */
7681                                         break;
7682                                 case 2:
7683                                         /* XXX compute */
7684                                         break;
7685                                 }
7686                                 break;
7687                         }
7688                         break;
7689                 case 230: /* thermal low to high */
7690                         DRM_DEBUG("IH: thermal low to high\n");
7691                         rdev->pm.dpm.thermal.high_to_low = false;
7692                         queue_thermal = true;
7693                         break;
7694                 case 231: /* thermal high to low */
7695                         DRM_DEBUG("IH: thermal high to low\n");
7696                         rdev->pm.dpm.thermal.high_to_low = true;
7697                         queue_thermal = true;
7698                         break;
7699                 case 233: /* GUI IDLE */
7700                         DRM_DEBUG("IH: GUI idle\n");
7701                         break;
7702                 case 241: /* SDMA Privileged inst */
7703                 case 247: /* SDMA Privileged inst */
7704                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
7705                         /* XXX check the bitfield order! */
7706                         me_id = (ring_id & 0x3) >> 0;
7707                         queue_id = (ring_id & 0xc) >> 2;
7708                         switch (me_id) {
7709                         case 0:
7710                                 switch (queue_id) {
7711                                 case 0:
7712                                         queue_reset = true;
7713                                         break;
7714                                 case 1:
7715                                         /* XXX compute */
7716                                         queue_reset = true;
7717                                         break;
7718                                 case 2:
7719                                         /* XXX compute */
7720                                         queue_reset = true;
7721                                         break;
7722                                 }
7723                                 break;
7724                         case 1:
7725                                 switch (queue_id) {
7726                                 case 0:
7727                                         queue_reset = true;
7728                                         break;
7729                                 case 1:
7730                                         /* XXX compute */
7731                                         queue_reset = true;
7732                                         break;
7733                                 case 2:
7734                                         /* XXX compute */
7735                                         queue_reset = true;
7736                                         break;
7737                                 }
7738                                 break;
7739                         }
7740                         break;
7741                 default:
7742                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7743                         break;
7744                 }
7745
7746                 /* wptr/rptr are in bytes! */
7747                 rptr += 16;
7748                 rptr &= rdev->ih.ptr_mask;
7749         }
7750         if (queue_hotplug)
7751                 schedule_work(&rdev->hotplug_work);
7752         if (queue_reset)
7753                 schedule_work(&rdev->reset_work);
7754         if (queue_thermal)
7755                 schedule_work(&rdev->pm.dpm.thermal.work);
7756         rdev->ih.rptr = rptr;
7757         WREG32(IH_RB_RPTR, rdev->ih.rptr);
7758         atomic_set(&rdev->ih.lock, 0);
7759
7760         /* make sure wptr hasn't changed while processing */
7761         wptr = cik_get_ih_wptr(rdev);
7762         if (wptr != rptr)
7763                 goto restart_ih;
7764
7765         return IRQ_HANDLED;
7766 }
7767
7768 /*
7769  * startup/shutdown callbacks
7770  */
7771 /**
7772  * cik_startup - program the asic to a functional state
7773  *
7774  * @rdev: radeon_device pointer
7775  *
7776  * Programs the asic to a functional state (CIK).
7777  * Called by cik_init() and cik_resume().
7778  * Returns 0 for success, error for failure.
7779  */
7780 static int cik_startup(struct radeon_device *rdev)
7781 {
7782         struct radeon_ring *ring;
7783         u32 nop;
7784         int r;
7785
7786         /* enable pcie gen2/3 link */
7787         cik_pcie_gen3_enable(rdev);
7788         /* enable aspm */
7789         cik_program_aspm(rdev);
7790
7791         /* scratch needs to be initialized before MC */
7792         r = r600_vram_scratch_init(rdev);
7793         if (r)
7794                 return r;
7795
7796         cik_mc_program(rdev);
7797
7798         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
7799                 r = ci_mc_load_microcode(rdev);
7800                 if (r) {
7801                         DRM_ERROR("Failed to load MC firmware!\n");
7802                         return r;
7803                 }
7804         }
7805
7806         r = cik_pcie_gart_enable(rdev);
7807         if (r)
7808                 return r;
7809         cik_gpu_init(rdev);
7810
7811         /* allocate rlc buffers */
7812         if (rdev->flags & RADEON_IS_IGP) {
7813                 if (rdev->family == CHIP_KAVERI) {
7814                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7815                         rdev->rlc.reg_list_size =
7816                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7817                 } else {
7818                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7819                         rdev->rlc.reg_list_size =
7820                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7821                 }
7822         }
7823         rdev->rlc.cs_data = ci_cs_data;
7824         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7825         r = sumo_rlc_init(rdev);
7826         if (r) {
7827                 DRM_ERROR("Failed to init rlc BOs!\n");
7828                 return r;
7829         }
7830
7831         /* allocate wb buffer */
7832         r = radeon_wb_init(rdev);
7833         if (r)
7834                 return r;
7835
7836         /* allocate mec buffers */
7837         r = cik_mec_init(rdev);
7838         if (r) {
7839                 DRM_ERROR("Failed to init MEC BOs!\n");
7840                 return r;
7841         }
7842
7843         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7844         if (r) {
7845                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7846                 return r;
7847         }
7848
7849         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7850         if (r) {
7851                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7852                 return r;
7853         }
7854
7855         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7856         if (r) {
7857                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7858                 return r;
7859         }
7860
7861         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7862         if (r) {
7863                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7864                 return r;
7865         }
7866
7867         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7868         if (r) {
7869                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7870                 return r;
7871         }
7872
7873         r = radeon_uvd_resume(rdev);
7874         if (!r) {
7875                 r = uvd_v4_2_resume(rdev);
7876                 if (!r) {
7877                         r = radeon_fence_driver_start_ring(rdev,
7878                                                            R600_RING_TYPE_UVD_INDEX);
7879                         if (r)
7880                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7881                 }
7882         }
7883         if (r)
7884                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7885
7886         /* Enable IRQ */
7887         if (!rdev->irq.installed) {
7888                 r = radeon_irq_kms_init(rdev);
7889                 if (r)
7890                         return r;
7891         }
7892
7893         r = cik_irq_init(rdev);
7894         if (r) {
7895                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7896                 radeon_irq_kms_fini(rdev);
7897                 return r;
7898         }
7899         cik_irq_set(rdev);
7900
7901         if (rdev->family == CHIP_HAWAII) {
7902                 nop = RADEON_CP_PACKET2;
7903         } else {
7904                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
7905         }
7906
7907         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7908         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7909                              nop);
7910         if (r)
7911                 return r;
7912
7913         /* set up the compute queues */
7914         /* type-2 packets are deprecated on MEC, use type-3 instead */
7915         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7916         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7917                              nop);
7918         if (r)
7919                 return r;
7920         ring->me = 1; /* first MEC */
7921         ring->pipe = 0; /* first pipe */
7922         ring->queue = 0; /* first queue */
7923         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7924
7925         /* type-2 packets are deprecated on MEC, use type-3 instead */
7926         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7927         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7928                              nop);
7929         if (r)
7930                 return r;
7931         /* dGPU only have 1 MEC */
7932         ring->me = 1; /* first MEC */
7933         ring->pipe = 0; /* first pipe */
7934         ring->queue = 1; /* second queue */
7935         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7936
7937         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7938         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7939                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7940         if (r)
7941                 return r;
7942
7943         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7944         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7945                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7946         if (r)
7947                 return r;
7948
7949         r = cik_cp_resume(rdev);
7950         if (r)
7951                 return r;
7952
7953         r = cik_sdma_resume(rdev);
7954         if (r)
7955                 return r;
7956
7957         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7958         if (ring->ring_size) {
7959                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7960                                      RADEON_CP_PACKET2);
7961                 if (!r)
7962                         r = uvd_v1_0_init(rdev);
7963                 if (r)
7964                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7965         }
7966
7967         r = radeon_ib_pool_init(rdev);
7968         if (r) {
7969                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7970                 return r;
7971         }
7972
7973         r = radeon_vm_manager_init(rdev);
7974         if (r) {
7975                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7976                 return r;
7977         }
7978
7979         r = dce6_audio_init(rdev);
7980         if (r)
7981                 return r;
7982
7983         return 0;
7984 }
7985
7986 /**
7987  * cik_resume - resume the asic to a functional state
7988  *
7989  * @rdev: radeon_device pointer
7990  *
7991  * Programs the asic to a functional state (CIK).
7992  * Called at resume.
7993  * Returns 0 for success, error for failure.
7994  */
7995 int cik_resume(struct radeon_device *rdev)
7996 {
7997         int r;
7998
7999         /* post card */
8000         atom_asic_init(rdev->mode_info.atom_context);
8001
8002         /* init golden registers */
8003         cik_init_golden_registers(rdev);
8004
8005         if (rdev->pm.pm_method == PM_METHOD_DPM)
8006                 radeon_pm_resume(rdev);
8007
8008         rdev->accel_working = true;
8009         r = cik_startup(rdev);
8010         if (r) {
8011                 DRM_ERROR("cik startup failed on resume\n");
8012                 rdev->accel_working = false;
8013                 return r;
8014         }
8015
8016         return r;
8017
8018 }
8019
8020 /**
8021  * cik_suspend - suspend the asic
8022  *
8023  * @rdev: radeon_device pointer
8024  *
8025  * Bring the chip into a state suitable for suspend (CIK).
8026  * Called at suspend.
8027  * Returns 0 for success.
8028  */
8029 int cik_suspend(struct radeon_device *rdev)
8030 {
8031         radeon_pm_suspend(rdev);
8032         dce6_audio_fini(rdev);
8033         radeon_vm_manager_fini(rdev);
8034         cik_cp_enable(rdev, false);
8035         cik_sdma_enable(rdev, false);
8036         uvd_v1_0_fini(rdev);
8037         radeon_uvd_suspend(rdev);
8038         cik_fini_pg(rdev);
8039         cik_fini_cg(rdev);
8040         cik_irq_suspend(rdev);
8041         radeon_wb_disable(rdev);
8042         cik_pcie_gart_disable(rdev);
8043         return 0;
8044 }
8045
8046 /* Plan is to move initialization in that function and use
8047  * helper function so that radeon_device_init pretty much
8048  * do nothing more than calling asic specific function. This
8049  * should also allow to remove a bunch of callback function
8050  * like vram_info.
8051  */
8052 /**
8053  * cik_init - asic specific driver and hw init
8054  *
8055  * @rdev: radeon_device pointer
8056  *
8057  * Setup asic specific driver variables and program the hw
8058  * to a functional state (CIK).
8059  * Called at driver startup.
8060  * Returns 0 for success, errors for failure.
8061  */
8062 int cik_init(struct radeon_device *rdev)
8063 {
8064         struct radeon_ring *ring;
8065         int r;
8066
8067         /* Read BIOS */
8068         if (!radeon_get_bios(rdev)) {
8069                 if (ASIC_IS_AVIVO(rdev))
8070                         return -EINVAL;
8071         }
8072         /* Must be an ATOMBIOS */
8073         if (!rdev->is_atom_bios) {
8074                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8075                 return -EINVAL;
8076         }
8077         r = radeon_atombios_init(rdev);
8078         if (r)
8079                 return r;
8080
8081         /* Post card if necessary */
8082         if (!radeon_card_posted(rdev)) {
8083                 if (!rdev->bios) {
8084                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8085                         return -EINVAL;
8086                 }
8087                 DRM_INFO("GPU not posted. posting now...\n");
8088                 atom_asic_init(rdev->mode_info.atom_context);
8089         }
8090         /* init golden registers */
8091         cik_init_golden_registers(rdev);
8092         /* Initialize scratch registers */
8093         cik_scratch_init(rdev);
8094         /* Initialize surface registers */
8095         radeon_surface_init(rdev);
8096         /* Initialize clocks */
8097         radeon_get_clock_info(rdev->ddev);
8098
8099         /* Fence driver */
8100         r = radeon_fence_driver_init(rdev);
8101         if (r)
8102                 return r;
8103
8104         /* initialize memory controller */
8105         r = cik_mc_init(rdev);
8106         if (r)
8107                 return r;
8108         /* Memory manager */
8109         r = radeon_bo_init(rdev);
8110         if (r)
8111                 return r;
8112
8113         if (rdev->flags & RADEON_IS_IGP) {
8114                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8115                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8116                         r = cik_init_microcode(rdev);
8117                         if (r) {
8118                                 DRM_ERROR("Failed to load firmware!\n");
8119                                 return r;
8120                         }
8121                 }
8122         } else {
8123                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8124                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8125                     !rdev->mc_fw) {
8126                         r = cik_init_microcode(rdev);
8127                         if (r) {
8128                                 DRM_ERROR("Failed to load firmware!\n");
8129                                 return r;
8130                         }
8131                 }
8132         }
8133
8134         /* Initialize power management */
8135         radeon_pm_init(rdev);
8136
8137         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8138         ring->ring_obj = NULL;
8139         r600_ring_init(rdev, ring, 1024 * 1024);
8140
8141         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8142         ring->ring_obj = NULL;
8143         r600_ring_init(rdev, ring, 1024 * 1024);
8144         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8145         if (r)
8146                 return r;
8147
8148         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8149         ring->ring_obj = NULL;
8150         r600_ring_init(rdev, ring, 1024 * 1024);
8151         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8152         if (r)
8153                 return r;
8154
8155         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8156         ring->ring_obj = NULL;
8157         r600_ring_init(rdev, ring, 256 * 1024);
8158
8159         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8160         ring->ring_obj = NULL;
8161         r600_ring_init(rdev, ring, 256 * 1024);
8162
8163         r = radeon_uvd_init(rdev);
8164         if (!r) {
8165                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8166                 ring->ring_obj = NULL;
8167                 r600_ring_init(rdev, ring, 4096);
8168         }
8169
8170         rdev->ih.ring_obj = NULL;
8171         r600_ih_ring_init(rdev, 64 * 1024);
8172
8173         r = r600_pcie_gart_init(rdev);
8174         if (r)
8175                 return r;
8176
8177         rdev->accel_working = true;
8178         r = cik_startup(rdev);
8179         if (r) {
8180                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8181                 cik_cp_fini(rdev);
8182                 cik_sdma_fini(rdev);
8183                 cik_irq_fini(rdev);
8184                 sumo_rlc_fini(rdev);
8185                 cik_mec_fini(rdev);
8186                 radeon_wb_fini(rdev);
8187                 radeon_ib_pool_fini(rdev);
8188                 radeon_vm_manager_fini(rdev);
8189                 radeon_irq_kms_fini(rdev);
8190                 cik_pcie_gart_fini(rdev);
8191                 rdev->accel_working = false;
8192         }
8193
8194         /* Don't start up if the MC ucode is missing.
8195          * The default clocks and voltages before the MC ucode
8196          * is loaded are not suffient for advanced operations.
8197          */
8198         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8199                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8200                 return -EINVAL;
8201         }
8202
8203         return 0;
8204 }
8205
8206 /**
8207  * cik_fini - asic specific driver and hw fini
8208  *
8209  * @rdev: radeon_device pointer
8210  *
8211  * Tear down the asic specific driver variables and program the hw
8212  * to an idle state (CIK).
8213  * Called at driver unload.
8214  */
8215 void cik_fini(struct radeon_device *rdev)
8216 {
8217         radeon_pm_fini(rdev);
8218         cik_cp_fini(rdev);
8219         cik_sdma_fini(rdev);
8220         cik_fini_pg(rdev);
8221         cik_fini_cg(rdev);
8222         cik_irq_fini(rdev);
8223         sumo_rlc_fini(rdev);
8224         cik_mec_fini(rdev);
8225         radeon_wb_fini(rdev);
8226         radeon_vm_manager_fini(rdev);
8227         radeon_ib_pool_fini(rdev);
8228         radeon_irq_kms_fini(rdev);
8229         uvd_v1_0_fini(rdev);
8230         radeon_uvd_fini(rdev);
8231         cik_pcie_gart_fini(rdev);
8232         r600_vram_scratch_fini(rdev);
8233         radeon_gem_fini(rdev);
8234         radeon_fence_driver_fini(rdev);
8235         radeon_bo_fini(rdev);
8236         radeon_atombios_fini(rdev);
8237         kfree(rdev->bios);
8238         rdev->bios = NULL;
8239 }
8240
8241 void dce8_program_fmt(struct drm_encoder *encoder)
8242 {
8243         struct drm_device *dev = encoder->dev;
8244         struct radeon_device *rdev = dev->dev_private;
8245         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8246         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8247         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8248         int bpc = 0;
8249         u32 tmp = 0;
8250         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8251
8252         if (connector) {
8253                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8254                 bpc = radeon_get_monitor_bpc(connector);
8255                 dither = radeon_connector->dither;
8256         }
8257
8258         /* LVDS/eDP FMT is set up by atom */
8259         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8260                 return;
8261
8262         /* not needed for analog */
8263         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8264             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8265                 return;
8266
8267         if (bpc == 0)
8268                 return;
8269
8270         switch (bpc) {
8271         case 6:
8272                 if (dither == RADEON_FMT_DITHER_ENABLE)
8273                         /* XXX sort out optimal dither settings */
8274                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8275                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8276                 else
8277                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8278                 break;
8279         case 8:
8280                 if (dither == RADEON_FMT_DITHER_ENABLE)
8281                         /* XXX sort out optimal dither settings */
8282                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8283                                 FMT_RGB_RANDOM_ENABLE |
8284                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8285                 else
8286                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8287                 break;
8288         case 10:
8289                 if (dither == RADEON_FMT_DITHER_ENABLE)
8290                         /* XXX sort out optimal dither settings */
8291                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8292                                 FMT_RGB_RANDOM_ENABLE |
8293                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8294                 else
8295                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8296                 break;
8297         default:
8298                 /* not needed */
8299                 break;
8300         }
8301
8302         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8303 }
8304
8305 /* display watermark setup */
8306 /**
8307  * dce8_line_buffer_adjust - Set up the line buffer
8308  *
8309  * @rdev: radeon_device pointer
8310  * @radeon_crtc: the selected display controller
8311  * @mode: the current display mode on the selected display
8312  * controller
8313  *
8314  * Setup up the line buffer allocation for
8315  * the selected display controller (CIK).
8316  * Returns the line buffer size in pixels.
8317  */
8318 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8319                                    struct radeon_crtc *radeon_crtc,
8320                                    struct drm_display_mode *mode)
8321 {
8322         u32 tmp, buffer_alloc, i;
8323         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8324         /*
8325          * Line Buffer Setup
8326          * There are 6 line buffers, one for each display controllers.
8327          * There are 3 partitions per LB. Select the number of partitions
8328          * to enable based on the display width.  For display widths larger
8329          * than 4096, you need use to use 2 display controllers and combine
8330          * them using the stereo blender.
8331          */
8332         if (radeon_crtc->base.enabled && mode) {
8333                 if (mode->crtc_hdisplay < 1920) {
8334                         tmp = 1;
8335                         buffer_alloc = 2;
8336                 } else if (mode->crtc_hdisplay < 2560) {
8337                         tmp = 2;
8338                         buffer_alloc = 2;
8339                 } else if (mode->crtc_hdisplay < 4096) {
8340                         tmp = 0;
8341                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8342                 } else {
8343                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8344                         tmp = 0;
8345                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8346                 }
8347         } else {
8348                 tmp = 1;
8349                 buffer_alloc = 0;
8350         }
8351
8352         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8353                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8354
8355         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8356                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8357         for (i = 0; i < rdev->usec_timeout; i++) {
8358                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8359                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8360                         break;
8361                 udelay(1);
8362         }
8363
8364         if (radeon_crtc->base.enabled && mode) {
8365                 switch (tmp) {
8366                 case 0:
8367                 default:
8368                         return 4096 * 2;
8369                 case 1:
8370                         return 1920 * 2;
8371                 case 2:
8372                         return 2560 * 2;
8373                 }
8374         }
8375
8376         /* controller not enabled, so no lb used */
8377         return 0;
8378 }
8379
8380 /**
8381  * cik_get_number_of_dram_channels - get the number of dram channels
8382  *
8383  * @rdev: radeon_device pointer
8384  *
8385  * Look up the number of video ram channels (CIK).
8386  * Used for display watermark bandwidth calculations
8387  * Returns the number of dram channels
8388  */
8389 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8390 {
8391         u32 tmp = RREG32(MC_SHARED_CHMAP);
8392
8393         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8394         case 0:
8395         default:
8396                 return 1;
8397         case 1:
8398                 return 2;
8399         case 2:
8400                 return 4;
8401         case 3:
8402                 return 8;
8403         case 4:
8404                 return 3;
8405         case 5:
8406                 return 6;
8407         case 6:
8408                 return 10;
8409         case 7:
8410                 return 12;
8411         case 8:
8412                 return 16;
8413         }
8414 }
8415
8416 struct dce8_wm_params {
8417         u32 dram_channels; /* number of dram channels */
8418         u32 yclk;          /* bandwidth per dram data pin in kHz */
8419         u32 sclk;          /* engine clock in kHz */
8420         u32 disp_clk;      /* display clock in kHz */
8421         u32 src_width;     /* viewport width */
8422         u32 active_time;   /* active display time in ns */
8423         u32 blank_time;    /* blank time in ns */
8424         bool interlaced;    /* mode is interlaced */
8425         fixed20_12 vsc;    /* vertical scale ratio */
8426         u32 num_heads;     /* number of active crtcs */
8427         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8428         u32 lb_size;       /* line buffer allocated to pipe */
8429         u32 vtaps;         /* vertical scaler taps */
8430 };
8431
8432 /**
8433  * dce8_dram_bandwidth - get the dram bandwidth
8434  *
8435  * @wm: watermark calculation data
8436  *
8437  * Calculate the raw dram bandwidth (CIK).
8438  * Used for display watermark bandwidth calculations
8439  * Returns the dram bandwidth in MBytes/s
8440  */
8441 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8442 {
8443         /* Calculate raw DRAM Bandwidth */
8444         fixed20_12 dram_efficiency; /* 0.7 */
8445         fixed20_12 yclk, dram_channels, bandwidth;
8446         fixed20_12 a;
8447
8448         a.full = dfixed_const(1000);
8449         yclk.full = dfixed_const(wm->yclk);
8450         yclk.full = dfixed_div(yclk, a);
8451         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8452         a.full = dfixed_const(10);
8453         dram_efficiency.full = dfixed_const(7);
8454         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8455         bandwidth.full = dfixed_mul(dram_channels, yclk);
8456         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8457
8458         return dfixed_trunc(bandwidth);
8459 }
8460
8461 /**
8462  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8463  *
8464  * @wm: watermark calculation data
8465  *
8466  * Calculate the dram bandwidth used for display (CIK).
8467  * Used for display watermark bandwidth calculations
8468  * Returns the dram bandwidth for display in MBytes/s
8469  */
8470 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8471 {
8472         /* Calculate DRAM Bandwidth and the part allocated to display. */
8473         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8474         fixed20_12 yclk, dram_channels, bandwidth;
8475         fixed20_12 a;
8476
8477         a.full = dfixed_const(1000);
8478         yclk.full = dfixed_const(wm->yclk);
8479         yclk.full = dfixed_div(yclk, a);
8480         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8481         a.full = dfixed_const(10);
8482         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8483         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8484         bandwidth.full = dfixed_mul(dram_channels, yclk);
8485         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8486
8487         return dfixed_trunc(bandwidth);
8488 }
8489
8490 /**
8491  * dce8_data_return_bandwidth - get the data return bandwidth
8492  *
8493  * @wm: watermark calculation data
8494  *
8495  * Calculate the data return bandwidth used for display (CIK).
8496  * Used for display watermark bandwidth calculations
8497  * Returns the data return bandwidth in MBytes/s
8498  */
8499 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8500 {
8501         /* Calculate the display Data return Bandwidth */
8502         fixed20_12 return_efficiency; /* 0.8 */
8503         fixed20_12 sclk, bandwidth;
8504         fixed20_12 a;
8505
8506         a.full = dfixed_const(1000);
8507         sclk.full = dfixed_const(wm->sclk);
8508         sclk.full = dfixed_div(sclk, a);
8509         a.full = dfixed_const(10);
8510         return_efficiency.full = dfixed_const(8);
8511         return_efficiency.full = dfixed_div(return_efficiency, a);
8512         a.full = dfixed_const(32);
8513         bandwidth.full = dfixed_mul(a, sclk);
8514         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8515
8516         return dfixed_trunc(bandwidth);
8517 }
8518
8519 /**
8520  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8521  *
8522  * @wm: watermark calculation data
8523  *
8524  * Calculate the dmif bandwidth used for display (CIK).
8525  * Used for display watermark bandwidth calculations
8526  * Returns the dmif bandwidth in MBytes/s
8527  */
8528 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8529 {
8530         /* Calculate the DMIF Request Bandwidth */
8531         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8532         fixed20_12 disp_clk, bandwidth;
8533         fixed20_12 a, b;
8534
8535         a.full = dfixed_const(1000);
8536         disp_clk.full = dfixed_const(wm->disp_clk);
8537         disp_clk.full = dfixed_div(disp_clk, a);
8538         a.full = dfixed_const(32);
8539         b.full = dfixed_mul(a, disp_clk);
8540
8541         a.full = dfixed_const(10);
8542         disp_clk_request_efficiency.full = dfixed_const(8);
8543         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8544
8545         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8546
8547         return dfixed_trunc(bandwidth);
8548 }
8549
8550 /**
8551  * dce8_available_bandwidth - get the min available bandwidth
8552  *
8553  * @wm: watermark calculation data
8554  *
8555  * Calculate the min available bandwidth used for display (CIK).
8556  * Used for display watermark bandwidth calculations
8557  * Returns the min available bandwidth in MBytes/s
8558  */
8559 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8560 {
8561         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8562         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8563         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8564         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8565
8566         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8567 }
8568
8569 /**
8570  * dce8_average_bandwidth - get the average available bandwidth
8571  *
8572  * @wm: watermark calculation data
8573  *
8574  * Calculate the average available bandwidth used for display (CIK).
8575  * Used for display watermark bandwidth calculations
8576  * Returns the average available bandwidth in MBytes/s
8577  */
8578 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8579 {
8580         /* Calculate the display mode Average Bandwidth
8581          * DisplayMode should contain the source and destination dimensions,
8582          * timing, etc.
8583          */
8584         fixed20_12 bpp;
8585         fixed20_12 line_time;
8586         fixed20_12 src_width;
8587         fixed20_12 bandwidth;
8588         fixed20_12 a;
8589
8590         a.full = dfixed_const(1000);
8591         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8592         line_time.full = dfixed_div(line_time, a);
8593         bpp.full = dfixed_const(wm->bytes_per_pixel);
8594         src_width.full = dfixed_const(wm->src_width);
8595         bandwidth.full = dfixed_mul(src_width, bpp);
8596         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8597         bandwidth.full = dfixed_div(bandwidth, line_time);
8598
8599         return dfixed_trunc(bandwidth);
8600 }
8601
8602 /**
8603  * dce8_latency_watermark - get the latency watermark
8604  *
8605  * @wm: watermark calculation data
8606  *
8607  * Calculate the latency watermark (CIK).
8608  * Used for display watermark bandwidth calculations
8609  * Returns the latency watermark in ns
8610  */
8611 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8612 {
8613         /* First calculate the latency in ns */
8614         u32 mc_latency = 2000; /* 2000 ns. */
8615         u32 available_bandwidth = dce8_available_bandwidth(wm);
8616         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8617         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8618         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8619         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8620                 (wm->num_heads * cursor_line_pair_return_time);
8621         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8622         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8623         u32 tmp, dmif_size = 12288;
8624         fixed20_12 a, b, c;
8625
8626         if (wm->num_heads == 0)
8627                 return 0;
8628
8629         a.full = dfixed_const(2);
8630         b.full = dfixed_const(1);
8631         if ((wm->vsc.full > a.full) ||
8632             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8633             (wm->vtaps >= 5) ||
8634             ((wm->vsc.full >= a.full) && wm->interlaced))
8635                 max_src_lines_per_dst_line = 4;
8636         else
8637                 max_src_lines_per_dst_line = 2;
8638
8639         a.full = dfixed_const(available_bandwidth);
8640         b.full = dfixed_const(wm->num_heads);
8641         a.full = dfixed_div(a, b);
8642
8643         b.full = dfixed_const(mc_latency + 512);
8644         c.full = dfixed_const(wm->disp_clk);
8645         b.full = dfixed_div(b, c);
8646
8647         c.full = dfixed_const(dmif_size);
8648         b.full = dfixed_div(c, b);
8649
8650         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8651
8652         b.full = dfixed_const(1000);
8653         c.full = dfixed_const(wm->disp_clk);
8654         b.full = dfixed_div(c, b);
8655         c.full = dfixed_const(wm->bytes_per_pixel);
8656         b.full = dfixed_mul(b, c);
8657
8658         lb_fill_bw = min(tmp, dfixed_trunc(b));
8659
8660         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8661         b.full = dfixed_const(1000);
8662         c.full = dfixed_const(lb_fill_bw);
8663         b.full = dfixed_div(c, b);
8664         a.full = dfixed_div(a, b);
8665         line_fill_time = dfixed_trunc(a);
8666
8667         if (line_fill_time < wm->active_time)
8668                 return latency;
8669         else
8670                 return latency + (line_fill_time - wm->active_time);
8671
8672 }
8673
8674 /**
8675  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8676  * average and available dram bandwidth
8677  *
8678  * @wm: watermark calculation data
8679  *
8680  * Check if the display average bandwidth fits in the display
8681  * dram bandwidth (CIK).
8682  * Used for display watermark bandwidth calculations
8683  * Returns true if the display fits, false if not.
8684  */
8685 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8686 {
8687         if (dce8_average_bandwidth(wm) <=
8688             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8689                 return true;
8690         else
8691                 return false;
8692 }
8693
8694 /**
8695  * dce8_average_bandwidth_vs_available_bandwidth - check
8696  * average and available bandwidth
8697  *
8698  * @wm: watermark calculation data
8699  *
8700  * Check if the display average bandwidth fits in the display
8701  * available bandwidth (CIK).
8702  * Used for display watermark bandwidth calculations
8703  * Returns true if the display fits, false if not.
8704  */
8705 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8706 {
8707         if (dce8_average_bandwidth(wm) <=
8708             (dce8_available_bandwidth(wm) / wm->num_heads))
8709                 return true;
8710         else
8711                 return false;
8712 }
8713
8714 /**
8715  * dce8_check_latency_hiding - check latency hiding
8716  *
8717  * @wm: watermark calculation data
8718  *
8719  * Check latency hiding (CIK).
8720  * Used for display watermark bandwidth calculations
8721  * Returns true if the display fits, false if not.
8722  */
8723 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8724 {
8725         u32 lb_partitions = wm->lb_size / wm->src_width;
8726         u32 line_time = wm->active_time + wm->blank_time;
8727         u32 latency_tolerant_lines;
8728         u32 latency_hiding;
8729         fixed20_12 a;
8730
8731         a.full = dfixed_const(1);
8732         if (wm->vsc.full > a.full)
8733                 latency_tolerant_lines = 1;
8734         else {
8735                 if (lb_partitions <= (wm->vtaps + 1))
8736                         latency_tolerant_lines = 1;
8737                 else
8738                         latency_tolerant_lines = 2;
8739         }
8740
8741         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8742
8743         if (dce8_latency_watermark(wm) <= latency_hiding)
8744                 return true;
8745         else
8746                 return false;
8747 }
8748
8749 /**
8750  * dce8_program_watermarks - program display watermarks
8751  *
8752  * @rdev: radeon_device pointer
8753  * @radeon_crtc: the selected display controller
8754  * @lb_size: line buffer size
8755  * @num_heads: number of display controllers in use
8756  *
8757  * Calculate and program the display watermarks for the
8758  * selected display controller (CIK).
8759  */
8760 static void dce8_program_watermarks(struct radeon_device *rdev,
8761                                     struct radeon_crtc *radeon_crtc,
8762                                     u32 lb_size, u32 num_heads)
8763 {
8764         struct drm_display_mode *mode = &radeon_crtc->base.mode;
8765         struct dce8_wm_params wm_low, wm_high;
8766         u32 pixel_period;
8767         u32 line_time = 0;
8768         u32 latency_watermark_a = 0, latency_watermark_b = 0;
8769         u32 tmp, wm_mask;
8770
8771         if (radeon_crtc->base.enabled && num_heads && mode) {
8772                 pixel_period = 1000000 / (u32)mode->clock;
8773                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8774
8775                 /* watermark for high clocks */
8776                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8777                     rdev->pm.dpm_enabled) {
8778                         wm_high.yclk =
8779                                 radeon_dpm_get_mclk(rdev, false) * 10;
8780                         wm_high.sclk =
8781                                 radeon_dpm_get_sclk(rdev, false) * 10;
8782                 } else {
8783                         wm_high.yclk = rdev->pm.current_mclk * 10;
8784                         wm_high.sclk = rdev->pm.current_sclk * 10;
8785                 }
8786
8787                 wm_high.disp_clk = mode->clock;
8788                 wm_high.src_width = mode->crtc_hdisplay;
8789                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8790                 wm_high.blank_time = line_time - wm_high.active_time;
8791                 wm_high.interlaced = false;
8792                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8793                         wm_high.interlaced = true;
8794                 wm_high.vsc = radeon_crtc->vsc;
8795                 wm_high.vtaps = 1;
8796                 if (radeon_crtc->rmx_type != RMX_OFF)
8797                         wm_high.vtaps = 2;
8798                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8799                 wm_high.lb_size = lb_size;
8800                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8801                 wm_high.num_heads = num_heads;
8802
8803                 /* set for high clocks */
8804                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8805
8806                 /* possibly force display priority to high */
8807                 /* should really do this at mode validation time... */
8808                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8809                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8810                     !dce8_check_latency_hiding(&wm_high) ||
8811                     (rdev->disp_priority == 2)) {
8812                         DRM_DEBUG_KMS("force priority to high\n");
8813                 }
8814
8815                 /* watermark for low clocks */
8816                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8817                     rdev->pm.dpm_enabled) {
8818                         wm_low.yclk =
8819                                 radeon_dpm_get_mclk(rdev, true) * 10;
8820                         wm_low.sclk =
8821                                 radeon_dpm_get_sclk(rdev, true) * 10;
8822                 } else {
8823                         wm_low.yclk = rdev->pm.current_mclk * 10;
8824                         wm_low.sclk = rdev->pm.current_sclk * 10;
8825                 }
8826
8827                 wm_low.disp_clk = mode->clock;
8828                 wm_low.src_width = mode->crtc_hdisplay;
8829                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8830                 wm_low.blank_time = line_time - wm_low.active_time;
8831                 wm_low.interlaced = false;
8832                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8833                         wm_low.interlaced = true;
8834                 wm_low.vsc = radeon_crtc->vsc;
8835                 wm_low.vtaps = 1;
8836                 if (radeon_crtc->rmx_type != RMX_OFF)
8837                         wm_low.vtaps = 2;
8838                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8839                 wm_low.lb_size = lb_size;
8840                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8841                 wm_low.num_heads = num_heads;
8842
8843                 /* set for low clocks */
8844                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8845
8846                 /* possibly force display priority to high */
8847                 /* should really do this at mode validation time... */
8848                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8849                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8850                     !dce8_check_latency_hiding(&wm_low) ||
8851                     (rdev->disp_priority == 2)) {
8852                         DRM_DEBUG_KMS("force priority to high\n");
8853                 }
8854         }
8855
8856         /* select wm A */
8857         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8858         tmp = wm_mask;
8859         tmp &= ~LATENCY_WATERMARK_MASK(3);
8860         tmp |= LATENCY_WATERMARK_MASK(1);
8861         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8862         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8863                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8864                 LATENCY_HIGH_WATERMARK(line_time)));
8865         /* select wm B */
8866         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8867         tmp &= ~LATENCY_WATERMARK_MASK(3);
8868         tmp |= LATENCY_WATERMARK_MASK(2);
8869         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8870         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8871                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8872                 LATENCY_HIGH_WATERMARK(line_time)));
8873         /* restore original selection */
8874         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8875
8876         /* save values for DPM */
8877         radeon_crtc->line_time = line_time;
8878         radeon_crtc->wm_high = latency_watermark_a;
8879         radeon_crtc->wm_low = latency_watermark_b;
8880 }
8881
8882 /**
8883  * dce8_bandwidth_update - program display watermarks
8884  *
8885  * @rdev: radeon_device pointer
8886  *
8887  * Calculate and program the display watermarks and line
8888  * buffer allocation (CIK).
8889  */
8890 void dce8_bandwidth_update(struct radeon_device *rdev)
8891 {
8892         struct drm_display_mode *mode = NULL;
8893         u32 num_heads = 0, lb_size;
8894         int i;
8895
8896         radeon_update_display_priority(rdev);
8897
8898         for (i = 0; i < rdev->num_crtc; i++) {
8899                 if (rdev->mode_info.crtcs[i]->base.enabled)
8900                         num_heads++;
8901         }
8902         for (i = 0; i < rdev->num_crtc; i++) {
8903                 mode = &rdev->mode_info.crtcs[i]->base.mode;
8904                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8905                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8906         }
8907 }
8908
8909 /**
8910  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8911  *
8912  * @rdev: radeon_device pointer
8913  *
8914  * Fetches a GPU clock counter snapshot (SI).
8915  * Returns the 64 bit clock counter snapshot.
8916  */
8917 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8918 {
8919         uint64_t clock;
8920
8921         mutex_lock(&rdev->gpu_clock_mutex);
8922         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8923         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8924                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8925         mutex_unlock(&rdev->gpu_clock_mutex);
8926         return clock;
8927 }
8928
8929 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8930                               u32 cntl_reg, u32 status_reg)
8931 {
8932         int r, i;
8933         struct atom_clock_dividers dividers;
8934         uint32_t tmp;
8935
8936         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8937                                            clock, false, &dividers);
8938         if (r)
8939                 return r;
8940
8941         tmp = RREG32_SMC(cntl_reg);
8942         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8943         tmp |= dividers.post_divider;
8944         WREG32_SMC(cntl_reg, tmp);
8945
8946         for (i = 0; i < 100; i++) {
8947                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8948                         break;
8949                 mdelay(10);
8950         }
8951         if (i == 100)
8952                 return -ETIMEDOUT;
8953
8954         return 0;
8955 }
8956
8957 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8958 {
8959         int r = 0;
8960
8961         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8962         if (r)
8963                 return r;
8964
8965         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8966         return r;
8967 }
8968
8969 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8970 {
8971         struct pci_dev *root = rdev->pdev->bus->self;
8972         int bridge_pos, gpu_pos;
8973         u32 speed_cntl, mask, current_data_rate;
8974         int ret, i;
8975         u16 tmp16;
8976
8977         if (radeon_pcie_gen2 == 0)
8978                 return;
8979
8980         if (rdev->flags & RADEON_IS_IGP)
8981                 return;
8982
8983         if (!(rdev->flags & RADEON_IS_PCIE))
8984                 return;
8985
8986         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8987         if (ret != 0)
8988                 return;
8989
8990         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8991                 return;
8992
8993         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8994         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8995                 LC_CURRENT_DATA_RATE_SHIFT;
8996         if (mask & DRM_PCIE_SPEED_80) {
8997                 if (current_data_rate == 2) {
8998                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8999                         return;
9000                 }
9001                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9002         } else if (mask & DRM_PCIE_SPEED_50) {
9003                 if (current_data_rate == 1) {
9004                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9005                         return;
9006                 }
9007                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9008         }
9009
9010         bridge_pos = pci_pcie_cap(root);
9011         if (!bridge_pos)
9012                 return;
9013
9014         gpu_pos = pci_pcie_cap(rdev->pdev);
9015         if (!gpu_pos)
9016                 return;
9017
9018         if (mask & DRM_PCIE_SPEED_80) {
9019                 /* re-try equalization if gen3 is not already enabled */
9020                 if (current_data_rate != 2) {
9021                         u16 bridge_cfg, gpu_cfg;
9022                         u16 bridge_cfg2, gpu_cfg2;
9023                         u32 max_lw, current_lw, tmp;
9024
9025                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9026                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9027
9028                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9029                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9030
9031                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9032                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9033
9034                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9035                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9036                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9037
9038                         if (current_lw < max_lw) {
9039                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9040                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9041                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9042                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9043                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9044                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9045                                 }
9046                         }
9047
9048                         for (i = 0; i < 10; i++) {
9049                                 /* check status */
9050                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9051                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9052                                         break;
9053
9054                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9055                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9056
9057                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9058                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9059
9060                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9061                                 tmp |= LC_SET_QUIESCE;
9062                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9063
9064                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9065                                 tmp |= LC_REDO_EQ;
9066                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9067
9068                                 mdelay(100);
9069
9070                                 /* linkctl */
9071                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9072                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9073                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9074                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9075
9076                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9077                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9078                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9079                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9080
9081                                 /* linkctl2 */
9082                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9083                                 tmp16 &= ~((1 << 4) | (7 << 9));
9084                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9085                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9086
9087                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9088                                 tmp16 &= ~((1 << 4) | (7 << 9));
9089                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9090                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9091
9092                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9093                                 tmp &= ~LC_SET_QUIESCE;
9094                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9095                         }
9096                 }
9097         }
9098
9099         /* set the link speed */
9100         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9101         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9102         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9103
9104         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9105         tmp16 &= ~0xf;
9106         if (mask & DRM_PCIE_SPEED_80)
9107                 tmp16 |= 3; /* gen3 */
9108         else if (mask & DRM_PCIE_SPEED_50)
9109                 tmp16 |= 2; /* gen2 */
9110         else
9111                 tmp16 |= 1; /* gen1 */
9112         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9113
9114         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9115         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9116         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9117
9118         for (i = 0; i < rdev->usec_timeout; i++) {
9119                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9120                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9121                         break;
9122                 udelay(1);
9123         }
9124 }
9125
9126 static void cik_program_aspm(struct radeon_device *rdev)
9127 {
9128         u32 data, orig;
9129         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9130         bool disable_clkreq = false;
9131
9132         if (radeon_aspm == 0)
9133                 return;
9134
9135         /* XXX double check IGPs */
9136         if (rdev->flags & RADEON_IS_IGP)
9137                 return;
9138
9139         if (!(rdev->flags & RADEON_IS_PCIE))
9140                 return;
9141
9142         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9143         data &= ~LC_XMIT_N_FTS_MASK;
9144         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9145         if (orig != data)
9146                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9147
9148         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9149         data |= LC_GO_TO_RECOVERY;
9150         if (orig != data)
9151                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9152
9153         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9154         data |= P_IGNORE_EDB_ERR;
9155         if (orig != data)
9156                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9157
9158         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9159         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9160         data |= LC_PMI_TO_L1_DIS;
9161         if (!disable_l0s)
9162                 data |= LC_L0S_INACTIVITY(7);
9163
9164         if (!disable_l1) {
9165                 data |= LC_L1_INACTIVITY(7);
9166                 data &= ~LC_PMI_TO_L1_DIS;
9167                 if (orig != data)
9168                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9169
9170                 if (!disable_plloff_in_l1) {
9171                         bool clk_req_support;
9172
9173                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9174                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9175                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9176                         if (orig != data)
9177                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9178
9179                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9180                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9181                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9182                         if (orig != data)
9183                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9184
9185                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9186                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9187                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9188                         if (orig != data)
9189                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9190
9191                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9192                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9193                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9194                         if (orig != data)
9195                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9196
9197                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9198                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9199                         data |= LC_DYN_LANES_PWR_STATE(3);
9200                         if (orig != data)
9201                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9202
9203                         if (!disable_clkreq) {
9204                                 struct pci_dev *root = rdev->pdev->bus->self;
9205                                 u32 lnkcap;
9206
9207                                 clk_req_support = false;
9208                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9209                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9210                                         clk_req_support = true;
9211                         } else {
9212                                 clk_req_support = false;
9213                         }
9214
9215                         if (clk_req_support) {
9216                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9217                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9218                                 if (orig != data)
9219                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9220
9221                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9222                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9223                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9224                                 if (orig != data)
9225                                         WREG32_SMC(THM_CLK_CNTL, data);
9226
9227                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9228                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9229                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9230                                 if (orig != data)
9231                                         WREG32_SMC(MISC_CLK_CTRL, data);
9232
9233                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9234                                 data &= ~BCLK_AS_XCLK;
9235                                 if (orig != data)
9236                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9237
9238                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9239                                 data &= ~FORCE_BIF_REFCLK_EN;
9240                                 if (orig != data)
9241                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9242
9243                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9244                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9245                                 data |= MPLL_CLKOUT_SEL(4);
9246                                 if (orig != data)
9247                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9248                         }
9249                 }
9250         } else {
9251                 if (orig != data)
9252                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9253         }
9254
9255         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9256         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9257         if (orig != data)
9258                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9259
9260         if (!disable_l0s) {
9261                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9262                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9263                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9264                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9265                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9266                                 data &= ~LC_L0S_INACTIVITY_MASK;
9267                                 if (orig != data)
9268                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9269                         }
9270                 }
9271         }
9272 }