Merge tag 'drm-misc-next-2019-12-16' of git://anongit.freedesktop.org/drm/drm-misc...
[platform/kernel/linux-starfive.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include <linux/slab.h>
29
30 #include <drm/drm_vblank.h>
31
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "cikd.h"
35 #include "clearstate_ci.h"
36 #include "radeon.h"
37 #include "radeon_asic.h"
38 #include "radeon_audio.h"
39 #include "radeon_ucode.h"
40
41 #define SH_MEM_CONFIG_GFX_DEFAULT \
42         ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
43
44 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
47 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
48 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
49 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
50 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
51 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
53
54 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
55 MODULE_FIRMWARE("radeon/bonaire_me.bin");
56 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
57 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
58 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
59 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
60 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
61 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
62 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
63
64 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
66 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
67 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
68 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
69 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
70 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
71 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
72 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
73
74 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
75 MODULE_FIRMWARE("radeon/hawaii_me.bin");
76 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
77 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
78 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
79 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
80 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
81 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
82 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
83
84 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
85 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
86 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
87 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
88 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
89 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
90
91 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
92 MODULE_FIRMWARE("radeon/kaveri_me.bin");
93 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
94 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
95 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
96 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
97 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
98
99 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
100 MODULE_FIRMWARE("radeon/KABINI_me.bin");
101 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
102 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
103 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
104 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
105
106 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
107 MODULE_FIRMWARE("radeon/kabini_me.bin");
108 MODULE_FIRMWARE("radeon/kabini_ce.bin");
109 MODULE_FIRMWARE("radeon/kabini_mec.bin");
110 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
111 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
112
113 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
114 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
115 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
116 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
117 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
118 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
119
120 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
121 MODULE_FIRMWARE("radeon/mullins_me.bin");
122 MODULE_FIRMWARE("radeon/mullins_ce.bin");
123 MODULE_FIRMWARE("radeon/mullins_mec.bin");
124 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
125 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
126
127 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
128 extern void r600_ih_ring_fini(struct radeon_device *rdev);
129 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
130 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
131 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
132 extern void sumo_rlc_fini(struct radeon_device *rdev);
133 extern int sumo_rlc_init(struct radeon_device *rdev);
134 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
135 extern void si_rlc_reset(struct radeon_device *rdev);
136 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
137 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
138 extern int cik_sdma_resume(struct radeon_device *rdev);
139 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
140 extern void cik_sdma_fini(struct radeon_device *rdev);
141 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
142 static void cik_rlc_stop(struct radeon_device *rdev);
143 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
144 static void cik_program_aspm(struct radeon_device *rdev);
145 static void cik_init_pg(struct radeon_device *rdev);
146 static void cik_init_cg(struct radeon_device *rdev);
147 static void cik_fini_pg(struct radeon_device *rdev);
148 static void cik_fini_cg(struct radeon_device *rdev);
149 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
150                                           bool enable);
151
152 /**
153  * cik_get_allowed_info_register - fetch the register for the info ioctl
154  *
155  * @rdev: radeon_device pointer
156  * @reg: register offset in bytes
157  * @val: register value
158  *
159  * Returns 0 for success or -EINVAL for an invalid register
160  *
161  */
162 int cik_get_allowed_info_register(struct radeon_device *rdev,
163                                   u32 reg, u32 *val)
164 {
165         switch (reg) {
166         case GRBM_STATUS:
167         case GRBM_STATUS2:
168         case GRBM_STATUS_SE0:
169         case GRBM_STATUS_SE1:
170         case GRBM_STATUS_SE2:
171         case GRBM_STATUS_SE3:
172         case SRBM_STATUS:
173         case SRBM_STATUS2:
174         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
175         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
176         case UVD_STATUS:
177         /* TODO VCE */
178                 *val = RREG32(reg);
179                 return 0;
180         default:
181                 return -EINVAL;
182         }
183 }
184
185 /*
186  * Indirect registers accessor
187  */
188 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
189 {
190         unsigned long flags;
191         u32 r;
192
193         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
194         WREG32(CIK_DIDT_IND_INDEX, (reg));
195         r = RREG32(CIK_DIDT_IND_DATA);
196         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
197         return r;
198 }
199
200 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
201 {
202         unsigned long flags;
203
204         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
205         WREG32(CIK_DIDT_IND_INDEX, (reg));
206         WREG32(CIK_DIDT_IND_DATA, (v));
207         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
208 }
209
210 /* get temperature in millidegrees */
211 int ci_get_temp(struct radeon_device *rdev)
212 {
213         u32 temp;
214         int actual_temp = 0;
215
216         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
217                 CTF_TEMP_SHIFT;
218
219         if (temp & 0x200)
220                 actual_temp = 255;
221         else
222                 actual_temp = temp & 0x1ff;
223
224         return actual_temp * 1000;
225 }
226
227 /* get temperature in millidegrees */
228 int kv_get_temp(struct radeon_device *rdev)
229 {
230         u32 temp;
231         int actual_temp = 0;
232
233         temp = RREG32_SMC(0xC0300E0C);
234
235         if (temp)
236                 actual_temp = (temp / 8) - 49;
237         else
238                 actual_temp = 0;
239
240         return actual_temp * 1000;
241 }
242
243 /*
244  * Indirect registers accessor
245  */
246 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
247 {
248         unsigned long flags;
249         u32 r;
250
251         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
252         WREG32(PCIE_INDEX, reg);
253         (void)RREG32(PCIE_INDEX);
254         r = RREG32(PCIE_DATA);
255         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
256         return r;
257 }
258
259 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
260 {
261         unsigned long flags;
262
263         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
264         WREG32(PCIE_INDEX, reg);
265         (void)RREG32(PCIE_INDEX);
266         WREG32(PCIE_DATA, v);
267         (void)RREG32(PCIE_DATA);
268         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
269 }
270
271 static const u32 spectre_rlc_save_restore_register_list[] =
272 {
273         (0x0e00 << 16) | (0xc12c >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0xc140 >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0xc150 >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0xc15c >> 2),
280         0x00000000,
281         (0x0e00 << 16) | (0xc168 >> 2),
282         0x00000000,
283         (0x0e00 << 16) | (0xc170 >> 2),
284         0x00000000,
285         (0x0e00 << 16) | (0xc178 >> 2),
286         0x00000000,
287         (0x0e00 << 16) | (0xc204 >> 2),
288         0x00000000,
289         (0x0e00 << 16) | (0xc2b4 >> 2),
290         0x00000000,
291         (0x0e00 << 16) | (0xc2b8 >> 2),
292         0x00000000,
293         (0x0e00 << 16) | (0xc2bc >> 2),
294         0x00000000,
295         (0x0e00 << 16) | (0xc2c0 >> 2),
296         0x00000000,
297         (0x0e00 << 16) | (0x8228 >> 2),
298         0x00000000,
299         (0x0e00 << 16) | (0x829c >> 2),
300         0x00000000,
301         (0x0e00 << 16) | (0x869c >> 2),
302         0x00000000,
303         (0x0600 << 16) | (0x98f4 >> 2),
304         0x00000000,
305         (0x0e00 << 16) | (0x98f8 >> 2),
306         0x00000000,
307         (0x0e00 << 16) | (0x9900 >> 2),
308         0x00000000,
309         (0x0e00 << 16) | (0xc260 >> 2),
310         0x00000000,
311         (0x0e00 << 16) | (0x90e8 >> 2),
312         0x00000000,
313         (0x0e00 << 16) | (0x3c000 >> 2),
314         0x00000000,
315         (0x0e00 << 16) | (0x3c00c >> 2),
316         0x00000000,
317         (0x0e00 << 16) | (0x8c1c >> 2),
318         0x00000000,
319         (0x0e00 << 16) | (0x9700 >> 2),
320         0x00000000,
321         (0x0e00 << 16) | (0xcd20 >> 2),
322         0x00000000,
323         (0x4e00 << 16) | (0xcd20 >> 2),
324         0x00000000,
325         (0x5e00 << 16) | (0xcd20 >> 2),
326         0x00000000,
327         (0x6e00 << 16) | (0xcd20 >> 2),
328         0x00000000,
329         (0x7e00 << 16) | (0xcd20 >> 2),
330         0x00000000,
331         (0x8e00 << 16) | (0xcd20 >> 2),
332         0x00000000,
333         (0x9e00 << 16) | (0xcd20 >> 2),
334         0x00000000,
335         (0xae00 << 16) | (0xcd20 >> 2),
336         0x00000000,
337         (0xbe00 << 16) | (0xcd20 >> 2),
338         0x00000000,
339         (0x0e00 << 16) | (0x89bc >> 2),
340         0x00000000,
341         (0x0e00 << 16) | (0x8900 >> 2),
342         0x00000000,
343         0x3,
344         (0x0e00 << 16) | (0xc130 >> 2),
345         0x00000000,
346         (0x0e00 << 16) | (0xc134 >> 2),
347         0x00000000,
348         (0x0e00 << 16) | (0xc1fc >> 2),
349         0x00000000,
350         (0x0e00 << 16) | (0xc208 >> 2),
351         0x00000000,
352         (0x0e00 << 16) | (0xc264 >> 2),
353         0x00000000,
354         (0x0e00 << 16) | (0xc268 >> 2),
355         0x00000000,
356         (0x0e00 << 16) | (0xc26c >> 2),
357         0x00000000,
358         (0x0e00 << 16) | (0xc270 >> 2),
359         0x00000000,
360         (0x0e00 << 16) | (0xc274 >> 2),
361         0x00000000,
362         (0x0e00 << 16) | (0xc278 >> 2),
363         0x00000000,
364         (0x0e00 << 16) | (0xc27c >> 2),
365         0x00000000,
366         (0x0e00 << 16) | (0xc280 >> 2),
367         0x00000000,
368         (0x0e00 << 16) | (0xc284 >> 2),
369         0x00000000,
370         (0x0e00 << 16) | (0xc288 >> 2),
371         0x00000000,
372         (0x0e00 << 16) | (0xc28c >> 2),
373         0x00000000,
374         (0x0e00 << 16) | (0xc290 >> 2),
375         0x00000000,
376         (0x0e00 << 16) | (0xc294 >> 2),
377         0x00000000,
378         (0x0e00 << 16) | (0xc298 >> 2),
379         0x00000000,
380         (0x0e00 << 16) | (0xc29c >> 2),
381         0x00000000,
382         (0x0e00 << 16) | (0xc2a0 >> 2),
383         0x00000000,
384         (0x0e00 << 16) | (0xc2a4 >> 2),
385         0x00000000,
386         (0x0e00 << 16) | (0xc2a8 >> 2),
387         0x00000000,
388         (0x0e00 << 16) | (0xc2ac  >> 2),
389         0x00000000,
390         (0x0e00 << 16) | (0xc2b0 >> 2),
391         0x00000000,
392         (0x0e00 << 16) | (0x301d0 >> 2),
393         0x00000000,
394         (0x0e00 << 16) | (0x30238 >> 2),
395         0x00000000,
396         (0x0e00 << 16) | (0x30250 >> 2),
397         0x00000000,
398         (0x0e00 << 16) | (0x30254 >> 2),
399         0x00000000,
400         (0x0e00 << 16) | (0x30258 >> 2),
401         0x00000000,
402         (0x0e00 << 16) | (0x3025c >> 2),
403         0x00000000,
404         (0x4e00 << 16) | (0xc900 >> 2),
405         0x00000000,
406         (0x5e00 << 16) | (0xc900 >> 2),
407         0x00000000,
408         (0x6e00 << 16) | (0xc900 >> 2),
409         0x00000000,
410         (0x7e00 << 16) | (0xc900 >> 2),
411         0x00000000,
412         (0x8e00 << 16) | (0xc900 >> 2),
413         0x00000000,
414         (0x9e00 << 16) | (0xc900 >> 2),
415         0x00000000,
416         (0xae00 << 16) | (0xc900 >> 2),
417         0x00000000,
418         (0xbe00 << 16) | (0xc900 >> 2),
419         0x00000000,
420         (0x4e00 << 16) | (0xc904 >> 2),
421         0x00000000,
422         (0x5e00 << 16) | (0xc904 >> 2),
423         0x00000000,
424         (0x6e00 << 16) | (0xc904 >> 2),
425         0x00000000,
426         (0x7e00 << 16) | (0xc904 >> 2),
427         0x00000000,
428         (0x8e00 << 16) | (0xc904 >> 2),
429         0x00000000,
430         (0x9e00 << 16) | (0xc904 >> 2),
431         0x00000000,
432         (0xae00 << 16) | (0xc904 >> 2),
433         0x00000000,
434         (0xbe00 << 16) | (0xc904 >> 2),
435         0x00000000,
436         (0x4e00 << 16) | (0xc908 >> 2),
437         0x00000000,
438         (0x5e00 << 16) | (0xc908 >> 2),
439         0x00000000,
440         (0x6e00 << 16) | (0xc908 >> 2),
441         0x00000000,
442         (0x7e00 << 16) | (0xc908 >> 2),
443         0x00000000,
444         (0x8e00 << 16) | (0xc908 >> 2),
445         0x00000000,
446         (0x9e00 << 16) | (0xc908 >> 2),
447         0x00000000,
448         (0xae00 << 16) | (0xc908 >> 2),
449         0x00000000,
450         (0xbe00 << 16) | (0xc908 >> 2),
451         0x00000000,
452         (0x4e00 << 16) | (0xc90c >> 2),
453         0x00000000,
454         (0x5e00 << 16) | (0xc90c >> 2),
455         0x00000000,
456         (0x6e00 << 16) | (0xc90c >> 2),
457         0x00000000,
458         (0x7e00 << 16) | (0xc90c >> 2),
459         0x00000000,
460         (0x8e00 << 16) | (0xc90c >> 2),
461         0x00000000,
462         (0x9e00 << 16) | (0xc90c >> 2),
463         0x00000000,
464         (0xae00 << 16) | (0xc90c >> 2),
465         0x00000000,
466         (0xbe00 << 16) | (0xc90c >> 2),
467         0x00000000,
468         (0x4e00 << 16) | (0xc910 >> 2),
469         0x00000000,
470         (0x5e00 << 16) | (0xc910 >> 2),
471         0x00000000,
472         (0x6e00 << 16) | (0xc910 >> 2),
473         0x00000000,
474         (0x7e00 << 16) | (0xc910 >> 2),
475         0x00000000,
476         (0x8e00 << 16) | (0xc910 >> 2),
477         0x00000000,
478         (0x9e00 << 16) | (0xc910 >> 2),
479         0x00000000,
480         (0xae00 << 16) | (0xc910 >> 2),
481         0x00000000,
482         (0xbe00 << 16) | (0xc910 >> 2),
483         0x00000000,
484         (0x0e00 << 16) | (0xc99c >> 2),
485         0x00000000,
486         (0x0e00 << 16) | (0x9834 >> 2),
487         0x00000000,
488         (0x0000 << 16) | (0x30f00 >> 2),
489         0x00000000,
490         (0x0001 << 16) | (0x30f00 >> 2),
491         0x00000000,
492         (0x0000 << 16) | (0x30f04 >> 2),
493         0x00000000,
494         (0x0001 << 16) | (0x30f04 >> 2),
495         0x00000000,
496         (0x0000 << 16) | (0x30f08 >> 2),
497         0x00000000,
498         (0x0001 << 16) | (0x30f08 >> 2),
499         0x00000000,
500         (0x0000 << 16) | (0x30f0c >> 2),
501         0x00000000,
502         (0x0001 << 16) | (0x30f0c >> 2),
503         0x00000000,
504         (0x0600 << 16) | (0x9b7c >> 2),
505         0x00000000,
506         (0x0e00 << 16) | (0x8a14 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0x8a18 >> 2),
509         0x00000000,
510         (0x0600 << 16) | (0x30a00 >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0x8bf0 >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0x8bcc >> 2),
515         0x00000000,
516         (0x0e00 << 16) | (0x8b24 >> 2),
517         0x00000000,
518         (0x0e00 << 16) | (0x30a04 >> 2),
519         0x00000000,
520         (0x0600 << 16) | (0x30a10 >> 2),
521         0x00000000,
522         (0x0600 << 16) | (0x30a14 >> 2),
523         0x00000000,
524         (0x0600 << 16) | (0x30a18 >> 2),
525         0x00000000,
526         (0x0600 << 16) | (0x30a2c >> 2),
527         0x00000000,
528         (0x0e00 << 16) | (0xc700 >> 2),
529         0x00000000,
530         (0x0e00 << 16) | (0xc704 >> 2),
531         0x00000000,
532         (0x0e00 << 16) | (0xc708 >> 2),
533         0x00000000,
534         (0x0e00 << 16) | (0xc768 >> 2),
535         0x00000000,
536         (0x0400 << 16) | (0xc770 >> 2),
537         0x00000000,
538         (0x0400 << 16) | (0xc774 >> 2),
539         0x00000000,
540         (0x0400 << 16) | (0xc778 >> 2),
541         0x00000000,
542         (0x0400 << 16) | (0xc77c >> 2),
543         0x00000000,
544         (0x0400 << 16) | (0xc780 >> 2),
545         0x00000000,
546         (0x0400 << 16) | (0xc784 >> 2),
547         0x00000000,
548         (0x0400 << 16) | (0xc788 >> 2),
549         0x00000000,
550         (0x0400 << 16) | (0xc78c >> 2),
551         0x00000000,
552         (0x0400 << 16) | (0xc798 >> 2),
553         0x00000000,
554         (0x0400 << 16) | (0xc79c >> 2),
555         0x00000000,
556         (0x0400 << 16) | (0xc7a0 >> 2),
557         0x00000000,
558         (0x0400 << 16) | (0xc7a4 >> 2),
559         0x00000000,
560         (0x0400 << 16) | (0xc7a8 >> 2),
561         0x00000000,
562         (0x0400 << 16) | (0xc7ac >> 2),
563         0x00000000,
564         (0x0400 << 16) | (0xc7b0 >> 2),
565         0x00000000,
566         (0x0400 << 16) | (0xc7b4 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x9100 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x3c010 >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x92a8 >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x92ac >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x92b4 >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x92b8 >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x92bc >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x92c0 >> 2),
583         0x00000000,
584         (0x0e00 << 16) | (0x92c4 >> 2),
585         0x00000000,
586         (0x0e00 << 16) | (0x92c8 >> 2),
587         0x00000000,
588         (0x0e00 << 16) | (0x92cc >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0x92d0 >> 2),
591         0x00000000,
592         (0x0e00 << 16) | (0x8c00 >> 2),
593         0x00000000,
594         (0x0e00 << 16) | (0x8c04 >> 2),
595         0x00000000,
596         (0x0e00 << 16) | (0x8c20 >> 2),
597         0x00000000,
598         (0x0e00 << 16) | (0x8c38 >> 2),
599         0x00000000,
600         (0x0e00 << 16) | (0x8c3c >> 2),
601         0x00000000,
602         (0x0e00 << 16) | (0xae00 >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0x9604 >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xac08 >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xac0c >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0xac10 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0xac14 >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0xac58 >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0xac68 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0xac6c >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0xac70 >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0xac74 >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0xac78 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0xac7c >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0xac80 >> 2),
629         0x00000000,
630         (0x0e00 << 16) | (0xac84 >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0xac88 >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0xac8c >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0x970c >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0x9714 >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0x9718 >> 2),
641         0x00000000,
642         (0x0e00 << 16) | (0x971c >> 2),
643         0x00000000,
644         (0x0e00 << 16) | (0x31068 >> 2),
645         0x00000000,
646         (0x4e00 << 16) | (0x31068 >> 2),
647         0x00000000,
648         (0x5e00 << 16) | (0x31068 >> 2),
649         0x00000000,
650         (0x6e00 << 16) | (0x31068 >> 2),
651         0x00000000,
652         (0x7e00 << 16) | (0x31068 >> 2),
653         0x00000000,
654         (0x8e00 << 16) | (0x31068 >> 2),
655         0x00000000,
656         (0x9e00 << 16) | (0x31068 >> 2),
657         0x00000000,
658         (0xae00 << 16) | (0x31068 >> 2),
659         0x00000000,
660         (0xbe00 << 16) | (0x31068 >> 2),
661         0x00000000,
662         (0x0e00 << 16) | (0xcd10 >> 2),
663         0x00000000,
664         (0x0e00 << 16) | (0xcd14 >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0x88b0 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0x88b4 >> 2),
669         0x00000000,
670         (0x0e00 << 16) | (0x88b8 >> 2),
671         0x00000000,
672         (0x0e00 << 16) | (0x88bc >> 2),
673         0x00000000,
674         (0x0400 << 16) | (0x89c0 >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0x88c4 >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0x88c8 >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0x88d0 >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0x88d4 >> 2),
683         0x00000000,
684         (0x0e00 << 16) | (0x88d8 >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0x8980 >> 2),
687         0x00000000,
688         (0x0e00 << 16) | (0x30938 >> 2),
689         0x00000000,
690         (0x0e00 << 16) | (0x3093c >> 2),
691         0x00000000,
692         (0x0e00 << 16) | (0x30940 >> 2),
693         0x00000000,
694         (0x0e00 << 16) | (0x89a0 >> 2),
695         0x00000000,
696         (0x0e00 << 16) | (0x30900 >> 2),
697         0x00000000,
698         (0x0e00 << 16) | (0x30904 >> 2),
699         0x00000000,
700         (0x0e00 << 16) | (0x89b4 >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0x3c210 >> 2),
703         0x00000000,
704         (0x0e00 << 16) | (0x3c214 >> 2),
705         0x00000000,
706         (0x0e00 << 16) | (0x3c218 >> 2),
707         0x00000000,
708         (0x0e00 << 16) | (0x8904 >> 2),
709         0x00000000,
710         0x5,
711         (0x0e00 << 16) | (0x8c28 >> 2),
712         (0x0e00 << 16) | (0x8c2c >> 2),
713         (0x0e00 << 16) | (0x8c30 >> 2),
714         (0x0e00 << 16) | (0x8c34 >> 2),
715         (0x0e00 << 16) | (0x9600 >> 2),
716 };
717
718 static const u32 kalindi_rlc_save_restore_register_list[] =
719 {
720         (0x0e00 << 16) | (0xc12c >> 2),
721         0x00000000,
722         (0x0e00 << 16) | (0xc140 >> 2),
723         0x00000000,
724         (0x0e00 << 16) | (0xc150 >> 2),
725         0x00000000,
726         (0x0e00 << 16) | (0xc15c >> 2),
727         0x00000000,
728         (0x0e00 << 16) | (0xc168 >> 2),
729         0x00000000,
730         (0x0e00 << 16) | (0xc170 >> 2),
731         0x00000000,
732         (0x0e00 << 16) | (0xc204 >> 2),
733         0x00000000,
734         (0x0e00 << 16) | (0xc2b4 >> 2),
735         0x00000000,
736         (0x0e00 << 16) | (0xc2b8 >> 2),
737         0x00000000,
738         (0x0e00 << 16) | (0xc2bc >> 2),
739         0x00000000,
740         (0x0e00 << 16) | (0xc2c0 >> 2),
741         0x00000000,
742         (0x0e00 << 16) | (0x8228 >> 2),
743         0x00000000,
744         (0x0e00 << 16) | (0x829c >> 2),
745         0x00000000,
746         (0x0e00 << 16) | (0x869c >> 2),
747         0x00000000,
748         (0x0600 << 16) | (0x98f4 >> 2),
749         0x00000000,
750         (0x0e00 << 16) | (0x98f8 >> 2),
751         0x00000000,
752         (0x0e00 << 16) | (0x9900 >> 2),
753         0x00000000,
754         (0x0e00 << 16) | (0xc260 >> 2),
755         0x00000000,
756         (0x0e00 << 16) | (0x90e8 >> 2),
757         0x00000000,
758         (0x0e00 << 16) | (0x3c000 >> 2),
759         0x00000000,
760         (0x0e00 << 16) | (0x3c00c >> 2),
761         0x00000000,
762         (0x0e00 << 16) | (0x8c1c >> 2),
763         0x00000000,
764         (0x0e00 << 16) | (0x9700 >> 2),
765         0x00000000,
766         (0x0e00 << 16) | (0xcd20 >> 2),
767         0x00000000,
768         (0x4e00 << 16) | (0xcd20 >> 2),
769         0x00000000,
770         (0x5e00 << 16) | (0xcd20 >> 2),
771         0x00000000,
772         (0x6e00 << 16) | (0xcd20 >> 2),
773         0x00000000,
774         (0x7e00 << 16) | (0xcd20 >> 2),
775         0x00000000,
776         (0x0e00 << 16) | (0x89bc >> 2),
777         0x00000000,
778         (0x0e00 << 16) | (0x8900 >> 2),
779         0x00000000,
780         0x3,
781         (0x0e00 << 16) | (0xc130 >> 2),
782         0x00000000,
783         (0x0e00 << 16) | (0xc134 >> 2),
784         0x00000000,
785         (0x0e00 << 16) | (0xc1fc >> 2),
786         0x00000000,
787         (0x0e00 << 16) | (0xc208 >> 2),
788         0x00000000,
789         (0x0e00 << 16) | (0xc264 >> 2),
790         0x00000000,
791         (0x0e00 << 16) | (0xc268 >> 2),
792         0x00000000,
793         (0x0e00 << 16) | (0xc26c >> 2),
794         0x00000000,
795         (0x0e00 << 16) | (0xc270 >> 2),
796         0x00000000,
797         (0x0e00 << 16) | (0xc274 >> 2),
798         0x00000000,
799         (0x0e00 << 16) | (0xc28c >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0xc290 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0xc294 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0xc298 >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0xc2a0 >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0xc2a4 >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0xc2a8 >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0xc2ac >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0x301d0 >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0x30238 >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0x30250 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0x30254 >> 2),
822         0x00000000,
823         (0x0e00 << 16) | (0x30258 >> 2),
824         0x00000000,
825         (0x0e00 << 16) | (0x3025c >> 2),
826         0x00000000,
827         (0x4e00 << 16) | (0xc900 >> 2),
828         0x00000000,
829         (0x5e00 << 16) | (0xc900 >> 2),
830         0x00000000,
831         (0x6e00 << 16) | (0xc900 >> 2),
832         0x00000000,
833         (0x7e00 << 16) | (0xc900 >> 2),
834         0x00000000,
835         (0x4e00 << 16) | (0xc904 >> 2),
836         0x00000000,
837         (0x5e00 << 16) | (0xc904 >> 2),
838         0x00000000,
839         (0x6e00 << 16) | (0xc904 >> 2),
840         0x00000000,
841         (0x7e00 << 16) | (0xc904 >> 2),
842         0x00000000,
843         (0x4e00 << 16) | (0xc908 >> 2),
844         0x00000000,
845         (0x5e00 << 16) | (0xc908 >> 2),
846         0x00000000,
847         (0x6e00 << 16) | (0xc908 >> 2),
848         0x00000000,
849         (0x7e00 << 16) | (0xc908 >> 2),
850         0x00000000,
851         (0x4e00 << 16) | (0xc90c >> 2),
852         0x00000000,
853         (0x5e00 << 16) | (0xc90c >> 2),
854         0x00000000,
855         (0x6e00 << 16) | (0xc90c >> 2),
856         0x00000000,
857         (0x7e00 << 16) | (0xc90c >> 2),
858         0x00000000,
859         (0x4e00 << 16) | (0xc910 >> 2),
860         0x00000000,
861         (0x5e00 << 16) | (0xc910 >> 2),
862         0x00000000,
863         (0x6e00 << 16) | (0xc910 >> 2),
864         0x00000000,
865         (0x7e00 << 16) | (0xc910 >> 2),
866         0x00000000,
867         (0x0e00 << 16) | (0xc99c >> 2),
868         0x00000000,
869         (0x0e00 << 16) | (0x9834 >> 2),
870         0x00000000,
871         (0x0000 << 16) | (0x30f00 >> 2),
872         0x00000000,
873         (0x0000 << 16) | (0x30f04 >> 2),
874         0x00000000,
875         (0x0000 << 16) | (0x30f08 >> 2),
876         0x00000000,
877         (0x0000 << 16) | (0x30f0c >> 2),
878         0x00000000,
879         (0x0600 << 16) | (0x9b7c >> 2),
880         0x00000000,
881         (0x0e00 << 16) | (0x8a14 >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0x8a18 >> 2),
884         0x00000000,
885         (0x0600 << 16) | (0x30a00 >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x8bf0 >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x8bcc >> 2),
890         0x00000000,
891         (0x0e00 << 16) | (0x8b24 >> 2),
892         0x00000000,
893         (0x0e00 << 16) | (0x30a04 >> 2),
894         0x00000000,
895         (0x0600 << 16) | (0x30a10 >> 2),
896         0x00000000,
897         (0x0600 << 16) | (0x30a14 >> 2),
898         0x00000000,
899         (0x0600 << 16) | (0x30a18 >> 2),
900         0x00000000,
901         (0x0600 << 16) | (0x30a2c >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0xc700 >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0xc704 >> 2),
906         0x00000000,
907         (0x0e00 << 16) | (0xc708 >> 2),
908         0x00000000,
909         (0x0e00 << 16) | (0xc768 >> 2),
910         0x00000000,
911         (0x0400 << 16) | (0xc770 >> 2),
912         0x00000000,
913         (0x0400 << 16) | (0xc774 >> 2),
914         0x00000000,
915         (0x0400 << 16) | (0xc798 >> 2),
916         0x00000000,
917         (0x0400 << 16) | (0xc79c >> 2),
918         0x00000000,
919         (0x0e00 << 16) | (0x9100 >> 2),
920         0x00000000,
921         (0x0e00 << 16) | (0x3c010 >> 2),
922         0x00000000,
923         (0x0e00 << 16) | (0x8c00 >> 2),
924         0x00000000,
925         (0x0e00 << 16) | (0x8c04 >> 2),
926         0x00000000,
927         (0x0e00 << 16) | (0x8c20 >> 2),
928         0x00000000,
929         (0x0e00 << 16) | (0x8c38 >> 2),
930         0x00000000,
931         (0x0e00 << 16) | (0x8c3c >> 2),
932         0x00000000,
933         (0x0e00 << 16) | (0xae00 >> 2),
934         0x00000000,
935         (0x0e00 << 16) | (0x9604 >> 2),
936         0x00000000,
937         (0x0e00 << 16) | (0xac08 >> 2),
938         0x00000000,
939         (0x0e00 << 16) | (0xac0c >> 2),
940         0x00000000,
941         (0x0e00 << 16) | (0xac10 >> 2),
942         0x00000000,
943         (0x0e00 << 16) | (0xac14 >> 2),
944         0x00000000,
945         (0x0e00 << 16) | (0xac58 >> 2),
946         0x00000000,
947         (0x0e00 << 16) | (0xac68 >> 2),
948         0x00000000,
949         (0x0e00 << 16) | (0xac6c >> 2),
950         0x00000000,
951         (0x0e00 << 16) | (0xac70 >> 2),
952         0x00000000,
953         (0x0e00 << 16) | (0xac74 >> 2),
954         0x00000000,
955         (0x0e00 << 16) | (0xac78 >> 2),
956         0x00000000,
957         (0x0e00 << 16) | (0xac7c >> 2),
958         0x00000000,
959         (0x0e00 << 16) | (0xac80 >> 2),
960         0x00000000,
961         (0x0e00 << 16) | (0xac84 >> 2),
962         0x00000000,
963         (0x0e00 << 16) | (0xac88 >> 2),
964         0x00000000,
965         (0x0e00 << 16) | (0xac8c >> 2),
966         0x00000000,
967         (0x0e00 << 16) | (0x970c >> 2),
968         0x00000000,
969         (0x0e00 << 16) | (0x9714 >> 2),
970         0x00000000,
971         (0x0e00 << 16) | (0x9718 >> 2),
972         0x00000000,
973         (0x0e00 << 16) | (0x971c >> 2),
974         0x00000000,
975         (0x0e00 << 16) | (0x31068 >> 2),
976         0x00000000,
977         (0x4e00 << 16) | (0x31068 >> 2),
978         0x00000000,
979         (0x5e00 << 16) | (0x31068 >> 2),
980         0x00000000,
981         (0x6e00 << 16) | (0x31068 >> 2),
982         0x00000000,
983         (0x7e00 << 16) | (0x31068 >> 2),
984         0x00000000,
985         (0x0e00 << 16) | (0xcd10 >> 2),
986         0x00000000,
987         (0x0e00 << 16) | (0xcd14 >> 2),
988         0x00000000,
989         (0x0e00 << 16) | (0x88b0 >> 2),
990         0x00000000,
991         (0x0e00 << 16) | (0x88b4 >> 2),
992         0x00000000,
993         (0x0e00 << 16) | (0x88b8 >> 2),
994         0x00000000,
995         (0x0e00 << 16) | (0x88bc >> 2),
996         0x00000000,
997         (0x0400 << 16) | (0x89c0 >> 2),
998         0x00000000,
999         (0x0e00 << 16) | (0x88c4 >> 2),
1000         0x00000000,
1001         (0x0e00 << 16) | (0x88c8 >> 2),
1002         0x00000000,
1003         (0x0e00 << 16) | (0x88d0 >> 2),
1004         0x00000000,
1005         (0x0e00 << 16) | (0x88d4 >> 2),
1006         0x00000000,
1007         (0x0e00 << 16) | (0x88d8 >> 2),
1008         0x00000000,
1009         (0x0e00 << 16) | (0x8980 >> 2),
1010         0x00000000,
1011         (0x0e00 << 16) | (0x30938 >> 2),
1012         0x00000000,
1013         (0x0e00 << 16) | (0x3093c >> 2),
1014         0x00000000,
1015         (0x0e00 << 16) | (0x30940 >> 2),
1016         0x00000000,
1017         (0x0e00 << 16) | (0x89a0 >> 2),
1018         0x00000000,
1019         (0x0e00 << 16) | (0x30900 >> 2),
1020         0x00000000,
1021         (0x0e00 << 16) | (0x30904 >> 2),
1022         0x00000000,
1023         (0x0e00 << 16) | (0x89b4 >> 2),
1024         0x00000000,
1025         (0x0e00 << 16) | (0x3e1fc >> 2),
1026         0x00000000,
1027         (0x0e00 << 16) | (0x3c210 >> 2),
1028         0x00000000,
1029         (0x0e00 << 16) | (0x3c214 >> 2),
1030         0x00000000,
1031         (0x0e00 << 16) | (0x3c218 >> 2),
1032         0x00000000,
1033         (0x0e00 << 16) | (0x8904 >> 2),
1034         0x00000000,
1035         0x5,
1036         (0x0e00 << 16) | (0x8c28 >> 2),
1037         (0x0e00 << 16) | (0x8c2c >> 2),
1038         (0x0e00 << 16) | (0x8c30 >> 2),
1039         (0x0e00 << 16) | (0x8c34 >> 2),
1040         (0x0e00 << 16) | (0x9600 >> 2),
1041 };
1042
1043 static const u32 bonaire_golden_spm_registers[] =
1044 {
1045         0x30800, 0xe0ffffff, 0xe0000000
1046 };
1047
1048 static const u32 bonaire_golden_common_registers[] =
1049 {
1050         0xc770, 0xffffffff, 0x00000800,
1051         0xc774, 0xffffffff, 0x00000800,
1052         0xc798, 0xffffffff, 0x00007fbf,
1053         0xc79c, 0xffffffff, 0x00007faf
1054 };
1055
1056 static const u32 bonaire_golden_registers[] =
1057 {
1058         0x3354, 0x00000333, 0x00000333,
1059         0x3350, 0x000c0fc0, 0x00040200,
1060         0x9a10, 0x00010000, 0x00058208,
1061         0x3c000, 0xffff1fff, 0x00140000,
1062         0x3c200, 0xfdfc0fff, 0x00000100,
1063         0x3c234, 0x40000000, 0x40000200,
1064         0x9830, 0xffffffff, 0x00000000,
1065         0x9834, 0xf00fffff, 0x00000400,
1066         0x9838, 0x0002021c, 0x00020200,
1067         0xc78, 0x00000080, 0x00000000,
1068         0x5bb0, 0x000000f0, 0x00000070,
1069         0x5bc0, 0xf0311fff, 0x80300000,
1070         0x98f8, 0x73773777, 0x12010001,
1071         0x350c, 0x00810000, 0x408af000,
1072         0x7030, 0x31000111, 0x00000011,
1073         0x2f48, 0x73773777, 0x12010001,
1074         0x220c, 0x00007fb6, 0x0021a1b1,
1075         0x2210, 0x00007fb6, 0x002021b1,
1076         0x2180, 0x00007fb6, 0x00002191,
1077         0x2218, 0x00007fb6, 0x002121b1,
1078         0x221c, 0x00007fb6, 0x002021b1,
1079         0x21dc, 0x00007fb6, 0x00002191,
1080         0x21e0, 0x00007fb6, 0x00002191,
1081         0x3628, 0x0000003f, 0x0000000a,
1082         0x362c, 0x0000003f, 0x0000000a,
1083         0x2ae4, 0x00073ffe, 0x000022a2,
1084         0x240c, 0x000007ff, 0x00000000,
1085         0x8a14, 0xf000003f, 0x00000007,
1086         0x8bf0, 0x00002001, 0x00000001,
1087         0x8b24, 0xffffffff, 0x00ffffff,
1088         0x30a04, 0x0000ff0f, 0x00000000,
1089         0x28a4c, 0x07ffffff, 0x06000000,
1090         0x4d8, 0x00000fff, 0x00000100,
1091         0x3e78, 0x00000001, 0x00000002,
1092         0x9100, 0x03000000, 0x0362c688,
1093         0x8c00, 0x000000ff, 0x00000001,
1094         0xe40, 0x00001fff, 0x00001fff,
1095         0x9060, 0x0000007f, 0x00000020,
1096         0x9508, 0x00010000, 0x00010000,
1097         0xac14, 0x000003ff, 0x000000f3,
1098         0xac0c, 0xffffffff, 0x00001032
1099 };
1100
1101 static const u32 bonaire_mgcg_cgcg_init[] =
1102 {
1103         0xc420, 0xffffffff, 0xfffffffc,
1104         0x30800, 0xffffffff, 0xe0000000,
1105         0x3c2a0, 0xffffffff, 0x00000100,
1106         0x3c208, 0xffffffff, 0x00000100,
1107         0x3c2c0, 0xffffffff, 0xc0000100,
1108         0x3c2c8, 0xffffffff, 0xc0000100,
1109         0x3c2c4, 0xffffffff, 0xc0000100,
1110         0x55e4, 0xffffffff, 0x00600100,
1111         0x3c280, 0xffffffff, 0x00000100,
1112         0x3c214, 0xffffffff, 0x06000100,
1113         0x3c220, 0xffffffff, 0x00000100,
1114         0x3c218, 0xffffffff, 0x06000100,
1115         0x3c204, 0xffffffff, 0x00000100,
1116         0x3c2e0, 0xffffffff, 0x00000100,
1117         0x3c224, 0xffffffff, 0x00000100,
1118         0x3c200, 0xffffffff, 0x00000100,
1119         0x3c230, 0xffffffff, 0x00000100,
1120         0x3c234, 0xffffffff, 0x00000100,
1121         0x3c250, 0xffffffff, 0x00000100,
1122         0x3c254, 0xffffffff, 0x00000100,
1123         0x3c258, 0xffffffff, 0x00000100,
1124         0x3c25c, 0xffffffff, 0x00000100,
1125         0x3c260, 0xffffffff, 0x00000100,
1126         0x3c27c, 0xffffffff, 0x00000100,
1127         0x3c278, 0xffffffff, 0x00000100,
1128         0x3c210, 0xffffffff, 0x06000100,
1129         0x3c290, 0xffffffff, 0x00000100,
1130         0x3c274, 0xffffffff, 0x00000100,
1131         0x3c2b4, 0xffffffff, 0x00000100,
1132         0x3c2b0, 0xffffffff, 0x00000100,
1133         0x3c270, 0xffffffff, 0x00000100,
1134         0x30800, 0xffffffff, 0xe0000000,
1135         0x3c020, 0xffffffff, 0x00010000,
1136         0x3c024, 0xffffffff, 0x00030002,
1137         0x3c028, 0xffffffff, 0x00040007,
1138         0x3c02c, 0xffffffff, 0x00060005,
1139         0x3c030, 0xffffffff, 0x00090008,
1140         0x3c034, 0xffffffff, 0x00010000,
1141         0x3c038, 0xffffffff, 0x00030002,
1142         0x3c03c, 0xffffffff, 0x00040007,
1143         0x3c040, 0xffffffff, 0x00060005,
1144         0x3c044, 0xffffffff, 0x00090008,
1145         0x3c048, 0xffffffff, 0x00010000,
1146         0x3c04c, 0xffffffff, 0x00030002,
1147         0x3c050, 0xffffffff, 0x00040007,
1148         0x3c054, 0xffffffff, 0x00060005,
1149         0x3c058, 0xffffffff, 0x00090008,
1150         0x3c05c, 0xffffffff, 0x00010000,
1151         0x3c060, 0xffffffff, 0x00030002,
1152         0x3c064, 0xffffffff, 0x00040007,
1153         0x3c068, 0xffffffff, 0x00060005,
1154         0x3c06c, 0xffffffff, 0x00090008,
1155         0x3c070, 0xffffffff, 0x00010000,
1156         0x3c074, 0xffffffff, 0x00030002,
1157         0x3c078, 0xffffffff, 0x00040007,
1158         0x3c07c, 0xffffffff, 0x00060005,
1159         0x3c080, 0xffffffff, 0x00090008,
1160         0x3c084, 0xffffffff, 0x00010000,
1161         0x3c088, 0xffffffff, 0x00030002,
1162         0x3c08c, 0xffffffff, 0x00040007,
1163         0x3c090, 0xffffffff, 0x00060005,
1164         0x3c094, 0xffffffff, 0x00090008,
1165         0x3c098, 0xffffffff, 0x00010000,
1166         0x3c09c, 0xffffffff, 0x00030002,
1167         0x3c0a0, 0xffffffff, 0x00040007,
1168         0x3c0a4, 0xffffffff, 0x00060005,
1169         0x3c0a8, 0xffffffff, 0x00090008,
1170         0x3c000, 0xffffffff, 0x96e00200,
1171         0x8708, 0xffffffff, 0x00900100,
1172         0xc424, 0xffffffff, 0x0020003f,
1173         0x38, 0xffffffff, 0x0140001c,
1174         0x3c, 0x000f0000, 0x000f0000,
1175         0x220, 0xffffffff, 0xC060000C,
1176         0x224, 0xc0000fff, 0x00000100,
1177         0xf90, 0xffffffff, 0x00000100,
1178         0xf98, 0x00000101, 0x00000000,
1179         0x20a8, 0xffffffff, 0x00000104,
1180         0x55e4, 0xff000fff, 0x00000100,
1181         0x30cc, 0xc0000fff, 0x00000104,
1182         0xc1e4, 0x00000001, 0x00000001,
1183         0xd00c, 0xff000ff0, 0x00000100,
1184         0xd80c, 0xff000ff0, 0x00000100
1185 };
1186
1187 static const u32 spectre_golden_spm_registers[] =
1188 {
1189         0x30800, 0xe0ffffff, 0xe0000000
1190 };
1191
1192 static const u32 spectre_golden_common_registers[] =
1193 {
1194         0xc770, 0xffffffff, 0x00000800,
1195         0xc774, 0xffffffff, 0x00000800,
1196         0xc798, 0xffffffff, 0x00007fbf,
1197         0xc79c, 0xffffffff, 0x00007faf
1198 };
1199
1200 static const u32 spectre_golden_registers[] =
1201 {
1202         0x3c000, 0xffff1fff, 0x96940200,
1203         0x3c00c, 0xffff0001, 0xff000000,
1204         0x3c200, 0xfffc0fff, 0x00000100,
1205         0x6ed8, 0x00010101, 0x00010000,
1206         0x9834, 0xf00fffff, 0x00000400,
1207         0x9838, 0xfffffffc, 0x00020200,
1208         0x5bb0, 0x000000f0, 0x00000070,
1209         0x5bc0, 0xf0311fff, 0x80300000,
1210         0x98f8, 0x73773777, 0x12010001,
1211         0x9b7c, 0x00ff0000, 0x00fc0000,
1212         0x2f48, 0x73773777, 0x12010001,
1213         0x8a14, 0xf000003f, 0x00000007,
1214         0x8b24, 0xffffffff, 0x00ffffff,
1215         0x28350, 0x3f3f3fff, 0x00000082,
1216         0x28354, 0x0000003f, 0x00000000,
1217         0x3e78, 0x00000001, 0x00000002,
1218         0x913c, 0xffff03df, 0x00000004,
1219         0xc768, 0x00000008, 0x00000008,
1220         0x8c00, 0x000008ff, 0x00000800,
1221         0x9508, 0x00010000, 0x00010000,
1222         0xac0c, 0xffffffff, 0x54763210,
1223         0x214f8, 0x01ff01ff, 0x00000002,
1224         0x21498, 0x007ff800, 0x00200000,
1225         0x2015c, 0xffffffff, 0x00000f40,
1226         0x30934, 0xffffffff, 0x00000001
1227 };
1228
1229 static const u32 spectre_mgcg_cgcg_init[] =
1230 {
1231         0xc420, 0xffffffff, 0xfffffffc,
1232         0x30800, 0xffffffff, 0xe0000000,
1233         0x3c2a0, 0xffffffff, 0x00000100,
1234         0x3c208, 0xffffffff, 0x00000100,
1235         0x3c2c0, 0xffffffff, 0x00000100,
1236         0x3c2c8, 0xffffffff, 0x00000100,
1237         0x3c2c4, 0xffffffff, 0x00000100,
1238         0x55e4, 0xffffffff, 0x00600100,
1239         0x3c280, 0xffffffff, 0x00000100,
1240         0x3c214, 0xffffffff, 0x06000100,
1241         0x3c220, 0xffffffff, 0x00000100,
1242         0x3c218, 0xffffffff, 0x06000100,
1243         0x3c204, 0xffffffff, 0x00000100,
1244         0x3c2e0, 0xffffffff, 0x00000100,
1245         0x3c224, 0xffffffff, 0x00000100,
1246         0x3c200, 0xffffffff, 0x00000100,
1247         0x3c230, 0xffffffff, 0x00000100,
1248         0x3c234, 0xffffffff, 0x00000100,
1249         0x3c250, 0xffffffff, 0x00000100,
1250         0x3c254, 0xffffffff, 0x00000100,
1251         0x3c258, 0xffffffff, 0x00000100,
1252         0x3c25c, 0xffffffff, 0x00000100,
1253         0x3c260, 0xffffffff, 0x00000100,
1254         0x3c27c, 0xffffffff, 0x00000100,
1255         0x3c278, 0xffffffff, 0x00000100,
1256         0x3c210, 0xffffffff, 0x06000100,
1257         0x3c290, 0xffffffff, 0x00000100,
1258         0x3c274, 0xffffffff, 0x00000100,
1259         0x3c2b4, 0xffffffff, 0x00000100,
1260         0x3c2b0, 0xffffffff, 0x00000100,
1261         0x3c270, 0xffffffff, 0x00000100,
1262         0x30800, 0xffffffff, 0xe0000000,
1263         0x3c020, 0xffffffff, 0x00010000,
1264         0x3c024, 0xffffffff, 0x00030002,
1265         0x3c028, 0xffffffff, 0x00040007,
1266         0x3c02c, 0xffffffff, 0x00060005,
1267         0x3c030, 0xffffffff, 0x00090008,
1268         0x3c034, 0xffffffff, 0x00010000,
1269         0x3c038, 0xffffffff, 0x00030002,
1270         0x3c03c, 0xffffffff, 0x00040007,
1271         0x3c040, 0xffffffff, 0x00060005,
1272         0x3c044, 0xffffffff, 0x00090008,
1273         0x3c048, 0xffffffff, 0x00010000,
1274         0x3c04c, 0xffffffff, 0x00030002,
1275         0x3c050, 0xffffffff, 0x00040007,
1276         0x3c054, 0xffffffff, 0x00060005,
1277         0x3c058, 0xffffffff, 0x00090008,
1278         0x3c05c, 0xffffffff, 0x00010000,
1279         0x3c060, 0xffffffff, 0x00030002,
1280         0x3c064, 0xffffffff, 0x00040007,
1281         0x3c068, 0xffffffff, 0x00060005,
1282         0x3c06c, 0xffffffff, 0x00090008,
1283         0x3c070, 0xffffffff, 0x00010000,
1284         0x3c074, 0xffffffff, 0x00030002,
1285         0x3c078, 0xffffffff, 0x00040007,
1286         0x3c07c, 0xffffffff, 0x00060005,
1287         0x3c080, 0xffffffff, 0x00090008,
1288         0x3c084, 0xffffffff, 0x00010000,
1289         0x3c088, 0xffffffff, 0x00030002,
1290         0x3c08c, 0xffffffff, 0x00040007,
1291         0x3c090, 0xffffffff, 0x00060005,
1292         0x3c094, 0xffffffff, 0x00090008,
1293         0x3c098, 0xffffffff, 0x00010000,
1294         0x3c09c, 0xffffffff, 0x00030002,
1295         0x3c0a0, 0xffffffff, 0x00040007,
1296         0x3c0a4, 0xffffffff, 0x00060005,
1297         0x3c0a8, 0xffffffff, 0x00090008,
1298         0x3c0ac, 0xffffffff, 0x00010000,
1299         0x3c0b0, 0xffffffff, 0x00030002,
1300         0x3c0b4, 0xffffffff, 0x00040007,
1301         0x3c0b8, 0xffffffff, 0x00060005,
1302         0x3c0bc, 0xffffffff, 0x00090008,
1303         0x3c000, 0xffffffff, 0x96e00200,
1304         0x8708, 0xffffffff, 0x00900100,
1305         0xc424, 0xffffffff, 0x0020003f,
1306         0x38, 0xffffffff, 0x0140001c,
1307         0x3c, 0x000f0000, 0x000f0000,
1308         0x220, 0xffffffff, 0xC060000C,
1309         0x224, 0xc0000fff, 0x00000100,
1310         0xf90, 0xffffffff, 0x00000100,
1311         0xf98, 0x00000101, 0x00000000,
1312         0x20a8, 0xffffffff, 0x00000104,
1313         0x55e4, 0xff000fff, 0x00000100,
1314         0x30cc, 0xc0000fff, 0x00000104,
1315         0xc1e4, 0x00000001, 0x00000001,
1316         0xd00c, 0xff000ff0, 0x00000100,
1317         0xd80c, 0xff000ff0, 0x00000100
1318 };
1319
1320 static const u32 kalindi_golden_spm_registers[] =
1321 {
1322         0x30800, 0xe0ffffff, 0xe0000000
1323 };
1324
1325 static const u32 kalindi_golden_common_registers[] =
1326 {
1327         0xc770, 0xffffffff, 0x00000800,
1328         0xc774, 0xffffffff, 0x00000800,
1329         0xc798, 0xffffffff, 0x00007fbf,
1330         0xc79c, 0xffffffff, 0x00007faf
1331 };
1332
1333 static const u32 kalindi_golden_registers[] =
1334 {
1335         0x3c000, 0xffffdfff, 0x6e944040,
1336         0x55e4, 0xff607fff, 0xfc000100,
1337         0x3c220, 0xff000fff, 0x00000100,
1338         0x3c224, 0xff000fff, 0x00000100,
1339         0x3c200, 0xfffc0fff, 0x00000100,
1340         0x6ed8, 0x00010101, 0x00010000,
1341         0x9830, 0xffffffff, 0x00000000,
1342         0x9834, 0xf00fffff, 0x00000400,
1343         0x5bb0, 0x000000f0, 0x00000070,
1344         0x5bc0, 0xf0311fff, 0x80300000,
1345         0x98f8, 0x73773777, 0x12010001,
1346         0x98fc, 0xffffffff, 0x00000010,
1347         0x9b7c, 0x00ff0000, 0x00fc0000,
1348         0x8030, 0x00001f0f, 0x0000100a,
1349         0x2f48, 0x73773777, 0x12010001,
1350         0x2408, 0x000fffff, 0x000c007f,
1351         0x8a14, 0xf000003f, 0x00000007,
1352         0x8b24, 0x3fff3fff, 0x00ffcfff,
1353         0x30a04, 0x0000ff0f, 0x00000000,
1354         0x28a4c, 0x07ffffff, 0x06000000,
1355         0x4d8, 0x00000fff, 0x00000100,
1356         0x3e78, 0x00000001, 0x00000002,
1357         0xc768, 0x00000008, 0x00000008,
1358         0x8c00, 0x000000ff, 0x00000003,
1359         0x214f8, 0x01ff01ff, 0x00000002,
1360         0x21498, 0x007ff800, 0x00200000,
1361         0x2015c, 0xffffffff, 0x00000f40,
1362         0x88c4, 0x001f3ae3, 0x00000082,
1363         0x88d4, 0x0000001f, 0x00000010,
1364         0x30934, 0xffffffff, 0x00000000
1365 };
1366
1367 static const u32 kalindi_mgcg_cgcg_init[] =
1368 {
1369         0xc420, 0xffffffff, 0xfffffffc,
1370         0x30800, 0xffffffff, 0xe0000000,
1371         0x3c2a0, 0xffffffff, 0x00000100,
1372         0x3c208, 0xffffffff, 0x00000100,
1373         0x3c2c0, 0xffffffff, 0x00000100,
1374         0x3c2c8, 0xffffffff, 0x00000100,
1375         0x3c2c4, 0xffffffff, 0x00000100,
1376         0x55e4, 0xffffffff, 0x00600100,
1377         0x3c280, 0xffffffff, 0x00000100,
1378         0x3c214, 0xffffffff, 0x06000100,
1379         0x3c220, 0xffffffff, 0x00000100,
1380         0x3c218, 0xffffffff, 0x06000100,
1381         0x3c204, 0xffffffff, 0x00000100,
1382         0x3c2e0, 0xffffffff, 0x00000100,
1383         0x3c224, 0xffffffff, 0x00000100,
1384         0x3c200, 0xffffffff, 0x00000100,
1385         0x3c230, 0xffffffff, 0x00000100,
1386         0x3c234, 0xffffffff, 0x00000100,
1387         0x3c250, 0xffffffff, 0x00000100,
1388         0x3c254, 0xffffffff, 0x00000100,
1389         0x3c258, 0xffffffff, 0x00000100,
1390         0x3c25c, 0xffffffff, 0x00000100,
1391         0x3c260, 0xffffffff, 0x00000100,
1392         0x3c27c, 0xffffffff, 0x00000100,
1393         0x3c278, 0xffffffff, 0x00000100,
1394         0x3c210, 0xffffffff, 0x06000100,
1395         0x3c290, 0xffffffff, 0x00000100,
1396         0x3c274, 0xffffffff, 0x00000100,
1397         0x3c2b4, 0xffffffff, 0x00000100,
1398         0x3c2b0, 0xffffffff, 0x00000100,
1399         0x3c270, 0xffffffff, 0x00000100,
1400         0x30800, 0xffffffff, 0xe0000000,
1401         0x3c020, 0xffffffff, 0x00010000,
1402         0x3c024, 0xffffffff, 0x00030002,
1403         0x3c028, 0xffffffff, 0x00040007,
1404         0x3c02c, 0xffffffff, 0x00060005,
1405         0x3c030, 0xffffffff, 0x00090008,
1406         0x3c034, 0xffffffff, 0x00010000,
1407         0x3c038, 0xffffffff, 0x00030002,
1408         0x3c03c, 0xffffffff, 0x00040007,
1409         0x3c040, 0xffffffff, 0x00060005,
1410         0x3c044, 0xffffffff, 0x00090008,
1411         0x3c000, 0xffffffff, 0x96e00200,
1412         0x8708, 0xffffffff, 0x00900100,
1413         0xc424, 0xffffffff, 0x0020003f,
1414         0x38, 0xffffffff, 0x0140001c,
1415         0x3c, 0x000f0000, 0x000f0000,
1416         0x220, 0xffffffff, 0xC060000C,
1417         0x224, 0xc0000fff, 0x00000100,
1418         0x20a8, 0xffffffff, 0x00000104,
1419         0x55e4, 0xff000fff, 0x00000100,
1420         0x30cc, 0xc0000fff, 0x00000104,
1421         0xc1e4, 0x00000001, 0x00000001,
1422         0xd00c, 0xff000ff0, 0x00000100,
1423         0xd80c, 0xff000ff0, 0x00000100
1424 };
1425
1426 static const u32 hawaii_golden_spm_registers[] =
1427 {
1428         0x30800, 0xe0ffffff, 0xe0000000
1429 };
1430
1431 static const u32 hawaii_golden_common_registers[] =
1432 {
1433         0x30800, 0xffffffff, 0xe0000000,
1434         0x28350, 0xffffffff, 0x3a00161a,
1435         0x28354, 0xffffffff, 0x0000002e,
1436         0x9a10, 0xffffffff, 0x00018208,
1437         0x98f8, 0xffffffff, 0x12011003
1438 };
1439
1440 static const u32 hawaii_golden_registers[] =
1441 {
1442         0x3354, 0x00000333, 0x00000333,
1443         0x9a10, 0x00010000, 0x00058208,
1444         0x9830, 0xffffffff, 0x00000000,
1445         0x9834, 0xf00fffff, 0x00000400,
1446         0x9838, 0x0002021c, 0x00020200,
1447         0xc78, 0x00000080, 0x00000000,
1448         0x5bb0, 0x000000f0, 0x00000070,
1449         0x5bc0, 0xf0311fff, 0x80300000,
1450         0x350c, 0x00810000, 0x408af000,
1451         0x7030, 0x31000111, 0x00000011,
1452         0x2f48, 0x73773777, 0x12010001,
1453         0x2120, 0x0000007f, 0x0000001b,
1454         0x21dc, 0x00007fb6, 0x00002191,
1455         0x3628, 0x0000003f, 0x0000000a,
1456         0x362c, 0x0000003f, 0x0000000a,
1457         0x2ae4, 0x00073ffe, 0x000022a2,
1458         0x240c, 0x000007ff, 0x00000000,
1459         0x8bf0, 0x00002001, 0x00000001,
1460         0x8b24, 0xffffffff, 0x00ffffff,
1461         0x30a04, 0x0000ff0f, 0x00000000,
1462         0x28a4c, 0x07ffffff, 0x06000000,
1463         0x3e78, 0x00000001, 0x00000002,
1464         0xc768, 0x00000008, 0x00000008,
1465         0xc770, 0x00000f00, 0x00000800,
1466         0xc774, 0x00000f00, 0x00000800,
1467         0xc798, 0x00ffffff, 0x00ff7fbf,
1468         0xc79c, 0x00ffffff, 0x00ff7faf,
1469         0x8c00, 0x000000ff, 0x00000800,
1470         0xe40, 0x00001fff, 0x00001fff,
1471         0x9060, 0x0000007f, 0x00000020,
1472         0x9508, 0x00010000, 0x00010000,
1473         0xae00, 0x00100000, 0x000ff07c,
1474         0xac14, 0x000003ff, 0x0000000f,
1475         0xac10, 0xffffffff, 0x7564fdec,
1476         0xac0c, 0xffffffff, 0x3120b9a8,
1477         0xac08, 0x20000000, 0x0f9c0000
1478 };
1479
1480 static const u32 hawaii_mgcg_cgcg_init[] =
1481 {
1482         0xc420, 0xffffffff, 0xfffffffd,
1483         0x30800, 0xffffffff, 0xe0000000,
1484         0x3c2a0, 0xffffffff, 0x00000100,
1485         0x3c208, 0xffffffff, 0x00000100,
1486         0x3c2c0, 0xffffffff, 0x00000100,
1487         0x3c2c8, 0xffffffff, 0x00000100,
1488         0x3c2c4, 0xffffffff, 0x00000100,
1489         0x55e4, 0xffffffff, 0x00200100,
1490         0x3c280, 0xffffffff, 0x00000100,
1491         0x3c214, 0xffffffff, 0x06000100,
1492         0x3c220, 0xffffffff, 0x00000100,
1493         0x3c218, 0xffffffff, 0x06000100,
1494         0x3c204, 0xffffffff, 0x00000100,
1495         0x3c2e0, 0xffffffff, 0x00000100,
1496         0x3c224, 0xffffffff, 0x00000100,
1497         0x3c200, 0xffffffff, 0x00000100,
1498         0x3c230, 0xffffffff, 0x00000100,
1499         0x3c234, 0xffffffff, 0x00000100,
1500         0x3c250, 0xffffffff, 0x00000100,
1501         0x3c254, 0xffffffff, 0x00000100,
1502         0x3c258, 0xffffffff, 0x00000100,
1503         0x3c25c, 0xffffffff, 0x00000100,
1504         0x3c260, 0xffffffff, 0x00000100,
1505         0x3c27c, 0xffffffff, 0x00000100,
1506         0x3c278, 0xffffffff, 0x00000100,
1507         0x3c210, 0xffffffff, 0x06000100,
1508         0x3c290, 0xffffffff, 0x00000100,
1509         0x3c274, 0xffffffff, 0x00000100,
1510         0x3c2b4, 0xffffffff, 0x00000100,
1511         0x3c2b0, 0xffffffff, 0x00000100,
1512         0x3c270, 0xffffffff, 0x00000100,
1513         0x30800, 0xffffffff, 0xe0000000,
1514         0x3c020, 0xffffffff, 0x00010000,
1515         0x3c024, 0xffffffff, 0x00030002,
1516         0x3c028, 0xffffffff, 0x00040007,
1517         0x3c02c, 0xffffffff, 0x00060005,
1518         0x3c030, 0xffffffff, 0x00090008,
1519         0x3c034, 0xffffffff, 0x00010000,
1520         0x3c038, 0xffffffff, 0x00030002,
1521         0x3c03c, 0xffffffff, 0x00040007,
1522         0x3c040, 0xffffffff, 0x00060005,
1523         0x3c044, 0xffffffff, 0x00090008,
1524         0x3c048, 0xffffffff, 0x00010000,
1525         0x3c04c, 0xffffffff, 0x00030002,
1526         0x3c050, 0xffffffff, 0x00040007,
1527         0x3c054, 0xffffffff, 0x00060005,
1528         0x3c058, 0xffffffff, 0x00090008,
1529         0x3c05c, 0xffffffff, 0x00010000,
1530         0x3c060, 0xffffffff, 0x00030002,
1531         0x3c064, 0xffffffff, 0x00040007,
1532         0x3c068, 0xffffffff, 0x00060005,
1533         0x3c06c, 0xffffffff, 0x00090008,
1534         0x3c070, 0xffffffff, 0x00010000,
1535         0x3c074, 0xffffffff, 0x00030002,
1536         0x3c078, 0xffffffff, 0x00040007,
1537         0x3c07c, 0xffffffff, 0x00060005,
1538         0x3c080, 0xffffffff, 0x00090008,
1539         0x3c084, 0xffffffff, 0x00010000,
1540         0x3c088, 0xffffffff, 0x00030002,
1541         0x3c08c, 0xffffffff, 0x00040007,
1542         0x3c090, 0xffffffff, 0x00060005,
1543         0x3c094, 0xffffffff, 0x00090008,
1544         0x3c098, 0xffffffff, 0x00010000,
1545         0x3c09c, 0xffffffff, 0x00030002,
1546         0x3c0a0, 0xffffffff, 0x00040007,
1547         0x3c0a4, 0xffffffff, 0x00060005,
1548         0x3c0a8, 0xffffffff, 0x00090008,
1549         0x3c0ac, 0xffffffff, 0x00010000,
1550         0x3c0b0, 0xffffffff, 0x00030002,
1551         0x3c0b4, 0xffffffff, 0x00040007,
1552         0x3c0b8, 0xffffffff, 0x00060005,
1553         0x3c0bc, 0xffffffff, 0x00090008,
1554         0x3c0c0, 0xffffffff, 0x00010000,
1555         0x3c0c4, 0xffffffff, 0x00030002,
1556         0x3c0c8, 0xffffffff, 0x00040007,
1557         0x3c0cc, 0xffffffff, 0x00060005,
1558         0x3c0d0, 0xffffffff, 0x00090008,
1559         0x3c0d4, 0xffffffff, 0x00010000,
1560         0x3c0d8, 0xffffffff, 0x00030002,
1561         0x3c0dc, 0xffffffff, 0x00040007,
1562         0x3c0e0, 0xffffffff, 0x00060005,
1563         0x3c0e4, 0xffffffff, 0x00090008,
1564         0x3c0e8, 0xffffffff, 0x00010000,
1565         0x3c0ec, 0xffffffff, 0x00030002,
1566         0x3c0f0, 0xffffffff, 0x00040007,
1567         0x3c0f4, 0xffffffff, 0x00060005,
1568         0x3c0f8, 0xffffffff, 0x00090008,
1569         0xc318, 0xffffffff, 0x00020200,
1570         0x3350, 0xffffffff, 0x00000200,
1571         0x15c0, 0xffffffff, 0x00000400,
1572         0x55e8, 0xffffffff, 0x00000000,
1573         0x2f50, 0xffffffff, 0x00000902,
1574         0x3c000, 0xffffffff, 0x96940200,
1575         0x8708, 0xffffffff, 0x00900100,
1576         0xc424, 0xffffffff, 0x0020003f,
1577         0x38, 0xffffffff, 0x0140001c,
1578         0x3c, 0x000f0000, 0x000f0000,
1579         0x220, 0xffffffff, 0xc060000c,
1580         0x224, 0xc0000fff, 0x00000100,
1581         0xf90, 0xffffffff, 0x00000100,
1582         0xf98, 0x00000101, 0x00000000,
1583         0x20a8, 0xffffffff, 0x00000104,
1584         0x55e4, 0xff000fff, 0x00000100,
1585         0x30cc, 0xc0000fff, 0x00000104,
1586         0xc1e4, 0x00000001, 0x00000001,
1587         0xd00c, 0xff000ff0, 0x00000100,
1588         0xd80c, 0xff000ff0, 0x00000100
1589 };
1590
1591 static const u32 godavari_golden_registers[] =
1592 {
1593         0x55e4, 0xff607fff, 0xfc000100,
1594         0x6ed8, 0x00010101, 0x00010000,
1595         0x9830, 0xffffffff, 0x00000000,
1596         0x98302, 0xf00fffff, 0x00000400,
1597         0x6130, 0xffffffff, 0x00010000,
1598         0x5bb0, 0x000000f0, 0x00000070,
1599         0x5bc0, 0xf0311fff, 0x80300000,
1600         0x98f8, 0x73773777, 0x12010001,
1601         0x98fc, 0xffffffff, 0x00000010,
1602         0x8030, 0x00001f0f, 0x0000100a,
1603         0x2f48, 0x73773777, 0x12010001,
1604         0x2408, 0x000fffff, 0x000c007f,
1605         0x8a14, 0xf000003f, 0x00000007,
1606         0x8b24, 0xffffffff, 0x00ff0fff,
1607         0x30a04, 0x0000ff0f, 0x00000000,
1608         0x28a4c, 0x07ffffff, 0x06000000,
1609         0x4d8, 0x00000fff, 0x00000100,
1610         0xd014, 0x00010000, 0x00810001,
1611         0xd814, 0x00010000, 0x00810001,
1612         0x3e78, 0x00000001, 0x00000002,
1613         0xc768, 0x00000008, 0x00000008,
1614         0xc770, 0x00000f00, 0x00000800,
1615         0xc774, 0x00000f00, 0x00000800,
1616         0xc798, 0x00ffffff, 0x00ff7fbf,
1617         0xc79c, 0x00ffffff, 0x00ff7faf,
1618         0x8c00, 0x000000ff, 0x00000001,
1619         0x214f8, 0x01ff01ff, 0x00000002,
1620         0x21498, 0x007ff800, 0x00200000,
1621         0x2015c, 0xffffffff, 0x00000f40,
1622         0x88c4, 0x001f3ae3, 0x00000082,
1623         0x88d4, 0x0000001f, 0x00000010,
1624         0x30934, 0xffffffff, 0x00000000
1625 };
1626
1627
1628 static void cik_init_golden_registers(struct radeon_device *rdev)
1629 {
1630         switch (rdev->family) {
1631         case CHIP_BONAIRE:
1632                 radeon_program_register_sequence(rdev,
1633                                                  bonaire_mgcg_cgcg_init,
1634                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1635                 radeon_program_register_sequence(rdev,
1636                                                  bonaire_golden_registers,
1637                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1638                 radeon_program_register_sequence(rdev,
1639                                                  bonaire_golden_common_registers,
1640                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1641                 radeon_program_register_sequence(rdev,
1642                                                  bonaire_golden_spm_registers,
1643                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1644                 break;
1645         case CHIP_KABINI:
1646                 radeon_program_register_sequence(rdev,
1647                                                  kalindi_mgcg_cgcg_init,
1648                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1649                 radeon_program_register_sequence(rdev,
1650                                                  kalindi_golden_registers,
1651                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1652                 radeon_program_register_sequence(rdev,
1653                                                  kalindi_golden_common_registers,
1654                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1655                 radeon_program_register_sequence(rdev,
1656                                                  kalindi_golden_spm_registers,
1657                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1658                 break;
1659         case CHIP_MULLINS:
1660                 radeon_program_register_sequence(rdev,
1661                                                  kalindi_mgcg_cgcg_init,
1662                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1663                 radeon_program_register_sequence(rdev,
1664                                                  godavari_golden_registers,
1665                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1666                 radeon_program_register_sequence(rdev,
1667                                                  kalindi_golden_common_registers,
1668                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1669                 radeon_program_register_sequence(rdev,
1670                                                  kalindi_golden_spm_registers,
1671                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1672                 break;
1673         case CHIP_KAVERI:
1674                 radeon_program_register_sequence(rdev,
1675                                                  spectre_mgcg_cgcg_init,
1676                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1677                 radeon_program_register_sequence(rdev,
1678                                                  spectre_golden_registers,
1679                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1680                 radeon_program_register_sequence(rdev,
1681                                                  spectre_golden_common_registers,
1682                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1683                 radeon_program_register_sequence(rdev,
1684                                                  spectre_golden_spm_registers,
1685                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1686                 break;
1687         case CHIP_HAWAII:
1688                 radeon_program_register_sequence(rdev,
1689                                                  hawaii_mgcg_cgcg_init,
1690                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1691                 radeon_program_register_sequence(rdev,
1692                                                  hawaii_golden_registers,
1693                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1694                 radeon_program_register_sequence(rdev,
1695                                                  hawaii_golden_common_registers,
1696                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1697                 radeon_program_register_sequence(rdev,
1698                                                  hawaii_golden_spm_registers,
1699                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1700                 break;
1701         default:
1702                 break;
1703         }
1704 }
1705
1706 /**
1707  * cik_get_xclk - get the xclk
1708  *
1709  * @rdev: radeon_device pointer
1710  *
1711  * Returns the reference clock used by the gfx engine
1712  * (CIK).
1713  */
1714 u32 cik_get_xclk(struct radeon_device *rdev)
1715 {
1716         u32 reference_clock = rdev->clock.spll.reference_freq;
1717
1718         if (rdev->flags & RADEON_IS_IGP) {
1719                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1720                         return reference_clock / 2;
1721         } else {
1722                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1723                         return reference_clock / 4;
1724         }
1725         return reference_clock;
1726 }
1727
1728 /**
1729  * cik_mm_rdoorbell - read a doorbell dword
1730  *
1731  * @rdev: radeon_device pointer
1732  * @index: doorbell index
1733  *
1734  * Returns the value in the doorbell aperture at the
1735  * requested doorbell index (CIK).
1736  */
1737 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1738 {
1739         if (index < rdev->doorbell.num_doorbells) {
1740                 return readl(rdev->doorbell.ptr + index);
1741         } else {
1742                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1743                 return 0;
1744         }
1745 }
1746
1747 /**
1748  * cik_mm_wdoorbell - write a doorbell dword
1749  *
1750  * @rdev: radeon_device pointer
1751  * @index: doorbell index
1752  * @v: value to write
1753  *
1754  * Writes @v to the doorbell aperture at the
1755  * requested doorbell index (CIK).
1756  */
1757 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1758 {
1759         if (index < rdev->doorbell.num_doorbells) {
1760                 writel(v, rdev->doorbell.ptr + index);
1761         } else {
1762                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1763         }
1764 }
1765
1766 #define BONAIRE_IO_MC_REGS_SIZE 36
1767
1768 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1769 {
1770         {0x00000070, 0x04400000},
1771         {0x00000071, 0x80c01803},
1772         {0x00000072, 0x00004004},
1773         {0x00000073, 0x00000100},
1774         {0x00000074, 0x00ff0000},
1775         {0x00000075, 0x34000000},
1776         {0x00000076, 0x08000014},
1777         {0x00000077, 0x00cc08ec},
1778         {0x00000078, 0x00000400},
1779         {0x00000079, 0x00000000},
1780         {0x0000007a, 0x04090000},
1781         {0x0000007c, 0x00000000},
1782         {0x0000007e, 0x4408a8e8},
1783         {0x0000007f, 0x00000304},
1784         {0x00000080, 0x00000000},
1785         {0x00000082, 0x00000001},
1786         {0x00000083, 0x00000002},
1787         {0x00000084, 0xf3e4f400},
1788         {0x00000085, 0x052024e3},
1789         {0x00000087, 0x00000000},
1790         {0x00000088, 0x01000000},
1791         {0x0000008a, 0x1c0a0000},
1792         {0x0000008b, 0xff010000},
1793         {0x0000008d, 0xffffefff},
1794         {0x0000008e, 0xfff3efff},
1795         {0x0000008f, 0xfff3efbf},
1796         {0x00000092, 0xf7ffffff},
1797         {0x00000093, 0xffffff7f},
1798         {0x00000095, 0x00101101},
1799         {0x00000096, 0x00000fff},
1800         {0x00000097, 0x00116fff},
1801         {0x00000098, 0x60010000},
1802         {0x00000099, 0x10010000},
1803         {0x0000009a, 0x00006000},
1804         {0x0000009b, 0x00001000},
1805         {0x0000009f, 0x00b48000}
1806 };
1807
1808 #define HAWAII_IO_MC_REGS_SIZE 22
1809
1810 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1811 {
1812         {0x0000007d, 0x40000000},
1813         {0x0000007e, 0x40180304},
1814         {0x0000007f, 0x0000ff00},
1815         {0x00000081, 0x00000000},
1816         {0x00000083, 0x00000800},
1817         {0x00000086, 0x00000000},
1818         {0x00000087, 0x00000100},
1819         {0x00000088, 0x00020100},
1820         {0x00000089, 0x00000000},
1821         {0x0000008b, 0x00040000},
1822         {0x0000008c, 0x00000100},
1823         {0x0000008e, 0xff010000},
1824         {0x00000090, 0xffffefff},
1825         {0x00000091, 0xfff3efff},
1826         {0x00000092, 0xfff3efbf},
1827         {0x00000093, 0xf7ffffff},
1828         {0x00000094, 0xffffff7f},
1829         {0x00000095, 0x00000fff},
1830         {0x00000096, 0x00116fff},
1831         {0x00000097, 0x60010000},
1832         {0x00000098, 0x10010000},
1833         {0x0000009f, 0x00c79000}
1834 };
1835
1836
1837 /**
1838  * cik_srbm_select - select specific register instances
1839  *
1840  * @rdev: radeon_device pointer
1841  * @me: selected ME (micro engine)
1842  * @pipe: pipe
1843  * @queue: queue
1844  * @vmid: VMID
1845  *
1846  * Switches the currently active registers instances.  Some
1847  * registers are instanced per VMID, others are instanced per
1848  * me/pipe/queue combination.
1849  */
1850 static void cik_srbm_select(struct radeon_device *rdev,
1851                             u32 me, u32 pipe, u32 queue, u32 vmid)
1852 {
1853         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1854                              MEID(me & 0x3) |
1855                              VMID(vmid & 0xf) |
1856                              QUEUEID(queue & 0x7));
1857         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1858 }
1859
1860 /* ucode loading */
1861 /**
1862  * ci_mc_load_microcode - load MC ucode into the hw
1863  *
1864  * @rdev: radeon_device pointer
1865  *
1866  * Load the GDDR MC ucode into the hw (CIK).
1867  * Returns 0 on success, error on failure.
1868  */
1869 int ci_mc_load_microcode(struct radeon_device *rdev)
1870 {
1871         const __be32 *fw_data = NULL;
1872         const __le32 *new_fw_data = NULL;
1873         u32 running, tmp;
1874         u32 *io_mc_regs = NULL;
1875         const __le32 *new_io_mc_regs = NULL;
1876         int i, regs_size, ucode_size;
1877
1878         if (!rdev->mc_fw)
1879                 return -EINVAL;
1880
1881         if (rdev->new_fw) {
1882                 const struct mc_firmware_header_v1_0 *hdr =
1883                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1884
1885                 radeon_ucode_print_mc_hdr(&hdr->header);
1886
1887                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1888                 new_io_mc_regs = (const __le32 *)
1889                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1890                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1891                 new_fw_data = (const __le32 *)
1892                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1893         } else {
1894                 ucode_size = rdev->mc_fw->size / 4;
1895
1896                 switch (rdev->family) {
1897                 case CHIP_BONAIRE:
1898                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1899                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1900                         break;
1901                 case CHIP_HAWAII:
1902                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1903                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1904                         break;
1905                 default:
1906                         return -EINVAL;
1907                 }
1908                 fw_data = (const __be32 *)rdev->mc_fw->data;
1909         }
1910
1911         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1912
1913         if (running == 0) {
1914                 /* reset the engine and set to writable */
1915                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1916                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1917
1918                 /* load mc io regs */
1919                 for (i = 0; i < regs_size; i++) {
1920                         if (rdev->new_fw) {
1921                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1922                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1923                         } else {
1924                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1925                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1926                         }
1927                 }
1928
1929                 tmp = RREG32(MC_SEQ_MISC0);
1930                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1931                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1932                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1933                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1934                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1935                 }
1936
1937                 /* load the MC ucode */
1938                 for (i = 0; i < ucode_size; i++) {
1939                         if (rdev->new_fw)
1940                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1941                         else
1942                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1943                 }
1944
1945                 /* put the engine back into the active state */
1946                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1947                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1948                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1949
1950                 /* wait for training to complete */
1951                 for (i = 0; i < rdev->usec_timeout; i++) {
1952                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1953                                 break;
1954                         udelay(1);
1955                 }
1956                 for (i = 0; i < rdev->usec_timeout; i++) {
1957                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1958                                 break;
1959                         udelay(1);
1960                 }
1961         }
1962
1963         return 0;
1964 }
1965
1966 /**
1967  * cik_init_microcode - load ucode images from disk
1968  *
1969  * @rdev: radeon_device pointer
1970  *
1971  * Use the firmware interface to load the ucode images into
1972  * the driver (not loaded into hw).
1973  * Returns 0 on success, error on failure.
1974  */
1975 static int cik_init_microcode(struct radeon_device *rdev)
1976 {
1977         const char *chip_name;
1978         const char *new_chip_name;
1979         size_t pfp_req_size, me_req_size, ce_req_size,
1980                 mec_req_size, rlc_req_size, mc_req_size = 0,
1981                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1982         char fw_name[30];
1983         int new_fw = 0;
1984         int err;
1985         int num_fw;
1986         bool new_smc = false;
1987
1988         DRM_DEBUG("\n");
1989
1990         switch (rdev->family) {
1991         case CHIP_BONAIRE:
1992                 chip_name = "BONAIRE";
1993                 if ((rdev->pdev->revision == 0x80) ||
1994                     (rdev->pdev->revision == 0x81) ||
1995                     (rdev->pdev->device == 0x665f))
1996                         new_smc = true;
1997                 new_chip_name = "bonaire";
1998                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1999                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2000                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2001                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2002                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2003                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2004                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2005                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2006                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2007                 num_fw = 8;
2008                 break;
2009         case CHIP_HAWAII:
2010                 chip_name = "HAWAII";
2011                 if (rdev->pdev->revision == 0x80)
2012                         new_smc = true;
2013                 new_chip_name = "hawaii";
2014                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2016                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2019                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2020                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2021                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2022                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2023                 num_fw = 8;
2024                 break;
2025         case CHIP_KAVERI:
2026                 chip_name = "KAVERI";
2027                 new_chip_name = "kaveri";
2028                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2029                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2030                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2031                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2032                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2033                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2034                 num_fw = 7;
2035                 break;
2036         case CHIP_KABINI:
2037                 chip_name = "KABINI";
2038                 new_chip_name = "kabini";
2039                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2040                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2041                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2042                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2043                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2044                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2045                 num_fw = 6;
2046                 break;
2047         case CHIP_MULLINS:
2048                 chip_name = "MULLINS";
2049                 new_chip_name = "mullins";
2050                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2051                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2052                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2053                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2054                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2055                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2056                 num_fw = 6;
2057                 break;
2058         default: BUG();
2059         }
2060
2061         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2062
2063         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2064         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2065         if (err) {
2066                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2067                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068                 if (err)
2069                         goto out;
2070                 if (rdev->pfp_fw->size != pfp_req_size) {
2071                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2072                                rdev->pfp_fw->size, fw_name);
2073                         err = -EINVAL;
2074                         goto out;
2075                 }
2076         } else {
2077                 err = radeon_ucode_validate(rdev->pfp_fw);
2078                 if (err) {
2079                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2080                                fw_name);
2081                         goto out;
2082                 } else {
2083                         new_fw++;
2084                 }
2085         }
2086
2087         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2088         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2089         if (err) {
2090                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2091                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2092                 if (err)
2093                         goto out;
2094                 if (rdev->me_fw->size != me_req_size) {
2095                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2096                                rdev->me_fw->size, fw_name);
2097                         err = -EINVAL;
2098                 }
2099         } else {
2100                 err = radeon_ucode_validate(rdev->me_fw);
2101                 if (err) {
2102                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2103                                fw_name);
2104                         goto out;
2105                 } else {
2106                         new_fw++;
2107                 }
2108         }
2109
2110         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2111         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2112         if (err) {
2113                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2114                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2115                 if (err)
2116                         goto out;
2117                 if (rdev->ce_fw->size != ce_req_size) {
2118                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2119                                rdev->ce_fw->size, fw_name);
2120                         err = -EINVAL;
2121                 }
2122         } else {
2123                 err = radeon_ucode_validate(rdev->ce_fw);
2124                 if (err) {
2125                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2126                                fw_name);
2127                         goto out;
2128                 } else {
2129                         new_fw++;
2130                 }
2131         }
2132
2133         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2134         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2135         if (err) {
2136                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2137                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2138                 if (err)
2139                         goto out;
2140                 if (rdev->mec_fw->size != mec_req_size) {
2141                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2142                                rdev->mec_fw->size, fw_name);
2143                         err = -EINVAL;
2144                 }
2145         } else {
2146                 err = radeon_ucode_validate(rdev->mec_fw);
2147                 if (err) {
2148                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2149                                fw_name);
2150                         goto out;
2151                 } else {
2152                         new_fw++;
2153                 }
2154         }
2155
2156         if (rdev->family == CHIP_KAVERI) {
2157                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2158                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2159                 if (err) {
2160                         goto out;
2161                 } else {
2162                         err = radeon_ucode_validate(rdev->mec2_fw);
2163                         if (err) {
2164                                 goto out;
2165                         } else {
2166                                 new_fw++;
2167                         }
2168                 }
2169         }
2170
2171         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2172         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2173         if (err) {
2174                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2175                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2176                 if (err)
2177                         goto out;
2178                 if (rdev->rlc_fw->size != rlc_req_size) {
2179                         pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2180                                rdev->rlc_fw->size, fw_name);
2181                         err = -EINVAL;
2182                 }
2183         } else {
2184                 err = radeon_ucode_validate(rdev->rlc_fw);
2185                 if (err) {
2186                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2187                                fw_name);
2188                         goto out;
2189                 } else {
2190                         new_fw++;
2191                 }
2192         }
2193
2194         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2195         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2196         if (err) {
2197                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2198                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2199                 if (err)
2200                         goto out;
2201                 if (rdev->sdma_fw->size != sdma_req_size) {
2202                         pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2203                                rdev->sdma_fw->size, fw_name);
2204                         err = -EINVAL;
2205                 }
2206         } else {
2207                 err = radeon_ucode_validate(rdev->sdma_fw);
2208                 if (err) {
2209                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2210                                fw_name);
2211                         goto out;
2212                 } else {
2213                         new_fw++;
2214                 }
2215         }
2216
2217         /* No SMC, MC ucode on APUs */
2218         if (!(rdev->flags & RADEON_IS_IGP)) {
2219                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2220                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2221                 if (err) {
2222                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2223                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2224                         if (err) {
2225                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2226                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2227                                 if (err)
2228                                         goto out;
2229                         }
2230                         if ((rdev->mc_fw->size != mc_req_size) &&
2231                             (rdev->mc_fw->size != mc2_req_size)){
2232                                 pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2233                                        rdev->mc_fw->size, fw_name);
2234                                 err = -EINVAL;
2235                         }
2236                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2237                 } else {
2238                         err = radeon_ucode_validate(rdev->mc_fw);
2239                         if (err) {
2240                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2241                                        fw_name);
2242                                 goto out;
2243                         } else {
2244                                 new_fw++;
2245                         }
2246                 }
2247
2248                 if (new_smc)
2249                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2250                 else
2251                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2252                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2253                 if (err) {
2254                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2255                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2256                         if (err) {
2257                                 pr_err("smc: error loading firmware \"%s\"\n",
2258                                        fw_name);
2259                                 release_firmware(rdev->smc_fw);
2260                                 rdev->smc_fw = NULL;
2261                                 err = 0;
2262                         } else if (rdev->smc_fw->size != smc_req_size) {
2263                                 pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2264                                        rdev->smc_fw->size, fw_name);
2265                                 err = -EINVAL;
2266                         }
2267                 } else {
2268                         err = radeon_ucode_validate(rdev->smc_fw);
2269                         if (err) {
2270                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2271                                        fw_name);
2272                                 goto out;
2273                         } else {
2274                                 new_fw++;
2275                         }
2276                 }
2277         }
2278
2279         if (new_fw == 0) {
2280                 rdev->new_fw = false;
2281         } else if (new_fw < num_fw) {
2282                 pr_err("ci_fw: mixing new and old firmware!\n");
2283                 err = -EINVAL;
2284         } else {
2285                 rdev->new_fw = true;
2286         }
2287
2288 out:
2289         if (err) {
2290                 if (err != -EINVAL)
2291                         pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2292                                fw_name);
2293                 release_firmware(rdev->pfp_fw);
2294                 rdev->pfp_fw = NULL;
2295                 release_firmware(rdev->me_fw);
2296                 rdev->me_fw = NULL;
2297                 release_firmware(rdev->ce_fw);
2298                 rdev->ce_fw = NULL;
2299                 release_firmware(rdev->mec_fw);
2300                 rdev->mec_fw = NULL;
2301                 release_firmware(rdev->mec2_fw);
2302                 rdev->mec2_fw = NULL;
2303                 release_firmware(rdev->rlc_fw);
2304                 rdev->rlc_fw = NULL;
2305                 release_firmware(rdev->sdma_fw);
2306                 rdev->sdma_fw = NULL;
2307                 release_firmware(rdev->mc_fw);
2308                 rdev->mc_fw = NULL;
2309                 release_firmware(rdev->smc_fw);
2310                 rdev->smc_fw = NULL;
2311         }
2312         return err;
2313 }
2314
2315 /*
2316  * Core functions
2317  */
2318 /**
2319  * cik_tiling_mode_table_init - init the hw tiling table
2320  *
2321  * @rdev: radeon_device pointer
2322  *
2323  * Starting with SI, the tiling setup is done globally in a
2324  * set of 32 tiling modes.  Rather than selecting each set of
2325  * parameters per surface as on older asics, we just select
2326  * which index in the tiling table we want to use, and the
2327  * surface uses those parameters (CIK).
2328  */
2329 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2330 {
2331         u32 *tile = rdev->config.cik.tile_mode_array;
2332         u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2333         const u32 num_tile_mode_states =
2334                         ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2335         const u32 num_secondary_tile_mode_states =
2336                         ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2337         u32 reg_offset, split_equal_to_row_size;
2338         u32 num_pipe_configs;
2339         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2340                 rdev->config.cik.max_shader_engines;
2341
2342         switch (rdev->config.cik.mem_row_size_in_kb) {
2343         case 1:
2344                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2345                 break;
2346         case 2:
2347         default:
2348                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2349                 break;
2350         case 4:
2351                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2352                 break;
2353         }
2354
2355         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2356         if (num_pipe_configs > 8)
2357                 num_pipe_configs = 16;
2358
2359         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2360                 tile[reg_offset] = 0;
2361         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2362                 macrotile[reg_offset] = 0;
2363
2364         switch(num_pipe_configs) {
2365         case 16:
2366                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2367                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2368                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2370                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2374                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2378                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2380                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2382                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2384                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385                            TILE_SPLIT(split_equal_to_row_size));
2386                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2387                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2389                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2390                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2391                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2393                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2394                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396                            TILE_SPLIT(split_equal_to_row_size));
2397                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2398                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2399                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2400                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2402                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2404                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2406                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2407                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2408                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2409                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2411                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2412                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2414                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2415                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2417                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2419                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2423                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2424                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2426                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2427                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2432                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2434                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2439                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2441                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444
2445                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448                            NUM_BANKS(ADDR_SURF_16_BANK));
2449                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2451                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452                            NUM_BANKS(ADDR_SURF_16_BANK));
2453                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456                            NUM_BANKS(ADDR_SURF_16_BANK));
2457                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460                            NUM_BANKS(ADDR_SURF_16_BANK));
2461                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464                            NUM_BANKS(ADDR_SURF_8_BANK));
2465                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468                            NUM_BANKS(ADDR_SURF_4_BANK));
2469                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472                            NUM_BANKS(ADDR_SURF_2_BANK));
2473                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2475                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476                            NUM_BANKS(ADDR_SURF_16_BANK));
2477                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2479                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480                            NUM_BANKS(ADDR_SURF_16_BANK));
2481                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484                             NUM_BANKS(ADDR_SURF_16_BANK));
2485                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488                             NUM_BANKS(ADDR_SURF_8_BANK));
2489                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492                             NUM_BANKS(ADDR_SURF_4_BANK));
2493                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2496                             NUM_BANKS(ADDR_SURF_2_BANK));
2497                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500                             NUM_BANKS(ADDR_SURF_2_BANK));
2501
2502                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2503                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2504                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2505                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2506                 break;
2507
2508         case 8:
2509                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2510                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2511                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2512                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2513                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2515                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2517                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2519                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2521                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2523                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2525                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2527                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528                            TILE_SPLIT(split_equal_to_row_size));
2529                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2530                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2532                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2533                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2534                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2536                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                            TILE_SPLIT(split_equal_to_row_size));
2540                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2541                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2542                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2543                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2545                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2547                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2549                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2550                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2551                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2552                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2554                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2558                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2560                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2561                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2562                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2564                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2565                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2566                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2567                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2569                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2570                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2573                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2575                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2577                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2579                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2580                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2581                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2582                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2584                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2585                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2586                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587
2588                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2590                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2591                                 NUM_BANKS(ADDR_SURF_16_BANK));
2592                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2594                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2595                                 NUM_BANKS(ADDR_SURF_16_BANK));
2596                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2598                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2599                                 NUM_BANKS(ADDR_SURF_16_BANK));
2600                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2602                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2603                                 NUM_BANKS(ADDR_SURF_16_BANK));
2604                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2606                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2607                                 NUM_BANKS(ADDR_SURF_8_BANK));
2608                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2611                                 NUM_BANKS(ADDR_SURF_4_BANK));
2612                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2614                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2615                                 NUM_BANKS(ADDR_SURF_2_BANK));
2616                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2618                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2619                                 NUM_BANKS(ADDR_SURF_16_BANK));
2620                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2622                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2623                                 NUM_BANKS(ADDR_SURF_16_BANK));
2624                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2626                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2627                                 NUM_BANKS(ADDR_SURF_16_BANK));
2628                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2630                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2631                                 NUM_BANKS(ADDR_SURF_16_BANK));
2632                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2634                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2635                                 NUM_BANKS(ADDR_SURF_8_BANK));
2636                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2638                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2639                                 NUM_BANKS(ADDR_SURF_4_BANK));
2640                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2642                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2643                                 NUM_BANKS(ADDR_SURF_2_BANK));
2644
2645                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2646                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2647                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2648                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2649                 break;
2650
2651         case 4:
2652                 if (num_rbs == 4) {
2653                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2655                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2656                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2657                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2659                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2661                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2663                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2664                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2665                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2667                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2669                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2671                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672                            TILE_SPLIT(split_equal_to_row_size));
2673                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2674                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2676                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2677                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2678                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2680                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2681                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                            TILE_SPLIT(split_equal_to_row_size));
2684                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2685                            PIPE_CONFIG(ADDR_SURF_P4_16x16));
2686                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2687                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2688                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2689                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2691                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2693                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2694                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2695                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2696                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2698                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2699                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2701                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2702                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2704                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2706                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2708                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2709                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2710                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2711                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2713                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2714                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2717                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2718                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2719                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2720                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2721                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2725                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2726                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2728                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2729                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2730                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731
2732                 } else if (num_rbs < 4) {
2733                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2734                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2735                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2736                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2737                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2739                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2740                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2741                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2743                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2744                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2745                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2747                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2749                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2751                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752                            TILE_SPLIT(split_equal_to_row_size));
2753                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2754                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2755                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2756                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2757                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2758                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2760                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2761                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763                            TILE_SPLIT(split_equal_to_row_size));
2764                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2765                            PIPE_CONFIG(ADDR_SURF_P4_8x16));
2766                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2767                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2768                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2769                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2770                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2771                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2773                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2774                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2775                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2777                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2778                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2779                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2781                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2782                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2783                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2784                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2786                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2788                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2789                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2790                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2793                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2794                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2797                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2798                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2799                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2800                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2801                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2803                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2804                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2805                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2808                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2809                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811                 }
2812
2813                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2815                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2816                                 NUM_BANKS(ADDR_SURF_16_BANK));
2817                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2819                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820                                 NUM_BANKS(ADDR_SURF_16_BANK));
2821                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2823                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2824                                 NUM_BANKS(ADDR_SURF_16_BANK));
2825                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2827                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2828                                 NUM_BANKS(ADDR_SURF_16_BANK));
2829                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2831                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2832                                 NUM_BANKS(ADDR_SURF_16_BANK));
2833                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836                                 NUM_BANKS(ADDR_SURF_8_BANK));
2837                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2840                                 NUM_BANKS(ADDR_SURF_4_BANK));
2841                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2842                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2843                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2844                                 NUM_BANKS(ADDR_SURF_16_BANK));
2845                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2846                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2847                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2848                                 NUM_BANKS(ADDR_SURF_16_BANK));
2849                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2851                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2852                                 NUM_BANKS(ADDR_SURF_16_BANK));
2853                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2855                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856                                 NUM_BANKS(ADDR_SURF_16_BANK));
2857                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2859                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2860                                 NUM_BANKS(ADDR_SURF_16_BANK));
2861                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2864                                 NUM_BANKS(ADDR_SURF_8_BANK));
2865                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2867                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2868                                 NUM_BANKS(ADDR_SURF_4_BANK));
2869
2870                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2871                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2872                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2873                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2874                 break;
2875
2876         case 2:
2877                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2879                            PIPE_CONFIG(ADDR_SURF_P2) |
2880                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2881                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2882                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2883                            PIPE_CONFIG(ADDR_SURF_P2) |
2884                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2885                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2887                            PIPE_CONFIG(ADDR_SURF_P2) |
2888                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2889                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2891                            PIPE_CONFIG(ADDR_SURF_P2) |
2892                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2893                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2895                            PIPE_CONFIG(ADDR_SURF_P2) |
2896                            TILE_SPLIT(split_equal_to_row_size));
2897                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2898                            PIPE_CONFIG(ADDR_SURF_P2) |
2899                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2900                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2901                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2902                            PIPE_CONFIG(ADDR_SURF_P2) |
2903                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2904                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2905                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906                            PIPE_CONFIG(ADDR_SURF_P2) |
2907                            TILE_SPLIT(split_equal_to_row_size));
2908                 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2909                            PIPE_CONFIG(ADDR_SURF_P2);
2910                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2911                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2912                            PIPE_CONFIG(ADDR_SURF_P2));
2913                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2914                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2915                             PIPE_CONFIG(ADDR_SURF_P2) |
2916                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2917                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2918                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2919                             PIPE_CONFIG(ADDR_SURF_P2) |
2920                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2921                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2922                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2923                             PIPE_CONFIG(ADDR_SURF_P2) |
2924                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2926                             PIPE_CONFIG(ADDR_SURF_P2) |
2927                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2928                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2929                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2930                             PIPE_CONFIG(ADDR_SURF_P2) |
2931                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2932                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2933                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2934                             PIPE_CONFIG(ADDR_SURF_P2) |
2935                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2937                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2938                             PIPE_CONFIG(ADDR_SURF_P2) |
2939                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2941                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2942                             PIPE_CONFIG(ADDR_SURF_P2));
2943                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2944                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2945                             PIPE_CONFIG(ADDR_SURF_P2) |
2946                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2947                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2948                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2949                             PIPE_CONFIG(ADDR_SURF_P2) |
2950                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2952                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2953                             PIPE_CONFIG(ADDR_SURF_P2) |
2954                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955
2956                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2957                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2958                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2959                                 NUM_BANKS(ADDR_SURF_16_BANK));
2960                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2961                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2962                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963                                 NUM_BANKS(ADDR_SURF_16_BANK));
2964                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2965                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2966                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2967                                 NUM_BANKS(ADDR_SURF_16_BANK));
2968                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2970                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971                                 NUM_BANKS(ADDR_SURF_16_BANK));
2972                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2974                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975                                 NUM_BANKS(ADDR_SURF_16_BANK));
2976                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2977                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2978                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2979                                 NUM_BANKS(ADDR_SURF_16_BANK));
2980                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2983                                 NUM_BANKS(ADDR_SURF_8_BANK));
2984                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2985                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2986                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2987                                 NUM_BANKS(ADDR_SURF_16_BANK));
2988                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2989                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2990                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991                                 NUM_BANKS(ADDR_SURF_16_BANK));
2992                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2993                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2994                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995                                 NUM_BANKS(ADDR_SURF_16_BANK));
2996                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2997                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2998                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999                                 NUM_BANKS(ADDR_SURF_16_BANK));
3000                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3001                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3002                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003                                 NUM_BANKS(ADDR_SURF_16_BANK));
3004                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3006                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007                                 NUM_BANKS(ADDR_SURF_16_BANK));
3008                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3011                                 NUM_BANKS(ADDR_SURF_8_BANK));
3012
3013                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3014                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3015                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3016                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3017                 break;
3018
3019         default:
3020                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3021         }
3022 }
3023
3024 /**
3025  * cik_select_se_sh - select which SE, SH to address
3026  *
3027  * @rdev: radeon_device pointer
3028  * @se_num: shader engine to address
3029  * @sh_num: sh block to address
3030  *
3031  * Select which SE, SH combinations to address. Certain
3032  * registers are instanced per SE or SH.  0xffffffff means
3033  * broadcast to all SEs or SHs (CIK).
3034  */
3035 static void cik_select_se_sh(struct radeon_device *rdev,
3036                              u32 se_num, u32 sh_num)
3037 {
3038         u32 data = INSTANCE_BROADCAST_WRITES;
3039
3040         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3041                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3042         else if (se_num == 0xffffffff)
3043                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3044         else if (sh_num == 0xffffffff)
3045                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3046         else
3047                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3048         WREG32(GRBM_GFX_INDEX, data);
3049 }
3050
3051 /**
3052  * cik_create_bitmask - create a bitmask
3053  *
3054  * @bit_width: length of the mask
3055  *
3056  * create a variable length bit mask (CIK).
3057  * Returns the bitmask.
3058  */
3059 static u32 cik_create_bitmask(u32 bit_width)
3060 {
3061         u32 i, mask = 0;
3062
3063         for (i = 0; i < bit_width; i++) {
3064                 mask <<= 1;
3065                 mask |= 1;
3066         }
3067         return mask;
3068 }
3069
3070 /**
3071  * cik_get_rb_disabled - computes the mask of disabled RBs
3072  *
3073  * @rdev: radeon_device pointer
3074  * @max_rb_num: max RBs (render backends) for the asic
3075  * @se_num: number of SEs (shader engines) for the asic
3076  * @sh_per_se: number of SH blocks per SE for the asic
3077  *
3078  * Calculates the bitmask of disabled RBs (CIK).
3079  * Returns the disabled RB bitmask.
3080  */
3081 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3082                               u32 max_rb_num_per_se,
3083                               u32 sh_per_se)
3084 {
3085         u32 data, mask;
3086
3087         data = RREG32(CC_RB_BACKEND_DISABLE);
3088         if (data & 1)
3089                 data &= BACKEND_DISABLE_MASK;
3090         else
3091                 data = 0;
3092         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3093
3094         data >>= BACKEND_DISABLE_SHIFT;
3095
3096         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3097
3098         return data & mask;
3099 }
3100
3101 /**
3102  * cik_setup_rb - setup the RBs on the asic
3103  *
3104  * @rdev: radeon_device pointer
3105  * @se_num: number of SEs (shader engines) for the asic
3106  * @sh_per_se: number of SH blocks per SE for the asic
3107  * @max_rb_num: max RBs (render backends) for the asic
3108  *
3109  * Configures per-SE/SH RB registers (CIK).
3110  */
3111 static void cik_setup_rb(struct radeon_device *rdev,
3112                          u32 se_num, u32 sh_per_se,
3113                          u32 max_rb_num_per_se)
3114 {
3115         int i, j;
3116         u32 data, mask;
3117         u32 disabled_rbs = 0;
3118         u32 enabled_rbs = 0;
3119
3120         for (i = 0; i < se_num; i++) {
3121                 for (j = 0; j < sh_per_se; j++) {
3122                         cik_select_se_sh(rdev, i, j);
3123                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3124                         if (rdev->family == CHIP_HAWAII)
3125                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3126                         else
3127                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3128                 }
3129         }
3130         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3131
3132         mask = 1;
3133         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3134                 if (!(disabled_rbs & mask))
3135                         enabled_rbs |= mask;
3136                 mask <<= 1;
3137         }
3138
3139         rdev->config.cik.backend_enable_mask = enabled_rbs;
3140
3141         for (i = 0; i < se_num; i++) {
3142                 cik_select_se_sh(rdev, i, 0xffffffff);
3143                 data = 0;
3144                 for (j = 0; j < sh_per_se; j++) {
3145                         switch (enabled_rbs & 3) {
3146                         case 0:
3147                                 if (j == 0)
3148                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3149                                 else
3150                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3151                                 break;
3152                         case 1:
3153                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3154                                 break;
3155                         case 2:
3156                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3157                                 break;
3158                         case 3:
3159                         default:
3160                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3161                                 break;
3162                         }
3163                         enabled_rbs >>= 2;
3164                 }
3165                 WREG32(PA_SC_RASTER_CONFIG, data);
3166         }
3167         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3168 }
3169
3170 /**
3171  * cik_gpu_init - setup the 3D engine
3172  *
3173  * @rdev: radeon_device pointer
3174  *
3175  * Configures the 3D engine and tiling configuration
3176  * registers so that the 3D engine is usable.
3177  */
3178 static void cik_gpu_init(struct radeon_device *rdev)
3179 {
3180         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3181         u32 mc_shared_chmap, mc_arb_ramcfg;
3182         u32 hdp_host_path_cntl;
3183         u32 tmp;
3184         int i, j;
3185
3186         switch (rdev->family) {
3187         case CHIP_BONAIRE:
3188                 rdev->config.cik.max_shader_engines = 2;
3189                 rdev->config.cik.max_tile_pipes = 4;
3190                 rdev->config.cik.max_cu_per_sh = 7;
3191                 rdev->config.cik.max_sh_per_se = 1;
3192                 rdev->config.cik.max_backends_per_se = 2;
3193                 rdev->config.cik.max_texture_channel_caches = 4;
3194                 rdev->config.cik.max_gprs = 256;
3195                 rdev->config.cik.max_gs_threads = 32;
3196                 rdev->config.cik.max_hw_contexts = 8;
3197
3198                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3199                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3200                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3201                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3202                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3203                 break;
3204         case CHIP_HAWAII:
3205                 rdev->config.cik.max_shader_engines = 4;
3206                 rdev->config.cik.max_tile_pipes = 16;
3207                 rdev->config.cik.max_cu_per_sh = 11;
3208                 rdev->config.cik.max_sh_per_se = 1;
3209                 rdev->config.cik.max_backends_per_se = 4;
3210                 rdev->config.cik.max_texture_channel_caches = 16;
3211                 rdev->config.cik.max_gprs = 256;
3212                 rdev->config.cik.max_gs_threads = 32;
3213                 rdev->config.cik.max_hw_contexts = 8;
3214
3215                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3216                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3217                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3218                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3219                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3220                 break;
3221         case CHIP_KAVERI:
3222                 rdev->config.cik.max_shader_engines = 1;
3223                 rdev->config.cik.max_tile_pipes = 4;
3224                 rdev->config.cik.max_cu_per_sh = 8;
3225                 rdev->config.cik.max_backends_per_se = 2;
3226                 rdev->config.cik.max_sh_per_se = 1;
3227                 rdev->config.cik.max_texture_channel_caches = 4;
3228                 rdev->config.cik.max_gprs = 256;
3229                 rdev->config.cik.max_gs_threads = 16;
3230                 rdev->config.cik.max_hw_contexts = 8;
3231
3232                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3233                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3234                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3235                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3236                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3237                 break;
3238         case CHIP_KABINI:
3239         case CHIP_MULLINS:
3240         default:
3241                 rdev->config.cik.max_shader_engines = 1;
3242                 rdev->config.cik.max_tile_pipes = 2;
3243                 rdev->config.cik.max_cu_per_sh = 2;
3244                 rdev->config.cik.max_sh_per_se = 1;
3245                 rdev->config.cik.max_backends_per_se = 1;
3246                 rdev->config.cik.max_texture_channel_caches = 2;
3247                 rdev->config.cik.max_gprs = 256;
3248                 rdev->config.cik.max_gs_threads = 16;
3249                 rdev->config.cik.max_hw_contexts = 8;
3250
3251                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3252                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3253                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3254                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3255                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3256                 break;
3257         }
3258
3259         /* Initialize HDP */
3260         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3261                 WREG32((0x2c14 + j), 0x00000000);
3262                 WREG32((0x2c18 + j), 0x00000000);
3263                 WREG32((0x2c1c + j), 0x00000000);
3264                 WREG32((0x2c20 + j), 0x00000000);
3265                 WREG32((0x2c24 + j), 0x00000000);
3266         }
3267
3268         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3269         WREG32(SRBM_INT_CNTL, 0x1);
3270         WREG32(SRBM_INT_ACK, 0x1);
3271
3272         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3273
3274         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3275         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3276
3277         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3278         rdev->config.cik.mem_max_burst_length_bytes = 256;
3279         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3280         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3281         if (rdev->config.cik.mem_row_size_in_kb > 4)
3282                 rdev->config.cik.mem_row_size_in_kb = 4;
3283         /* XXX use MC settings? */
3284         rdev->config.cik.shader_engine_tile_size = 32;
3285         rdev->config.cik.num_gpus = 1;
3286         rdev->config.cik.multi_gpu_tile_size = 64;
3287
3288         /* fix up row size */
3289         gb_addr_config &= ~ROW_SIZE_MASK;
3290         switch (rdev->config.cik.mem_row_size_in_kb) {
3291         case 1:
3292         default:
3293                 gb_addr_config |= ROW_SIZE(0);
3294                 break;
3295         case 2:
3296                 gb_addr_config |= ROW_SIZE(1);
3297                 break;
3298         case 4:
3299                 gb_addr_config |= ROW_SIZE(2);
3300                 break;
3301         }
3302
3303         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3304          * not have bank info, so create a custom tiling dword.
3305          * bits 3:0   num_pipes
3306          * bits 7:4   num_banks
3307          * bits 11:8  group_size
3308          * bits 15:12 row_size
3309          */
3310         rdev->config.cik.tile_config = 0;
3311         switch (rdev->config.cik.num_tile_pipes) {
3312         case 1:
3313                 rdev->config.cik.tile_config |= (0 << 0);
3314                 break;
3315         case 2:
3316                 rdev->config.cik.tile_config |= (1 << 0);
3317                 break;
3318         case 4:
3319                 rdev->config.cik.tile_config |= (2 << 0);
3320                 break;
3321         case 8:
3322         default:
3323                 /* XXX what about 12? */
3324                 rdev->config.cik.tile_config |= (3 << 0);
3325                 break;
3326         }
3327         rdev->config.cik.tile_config |=
3328                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3329         rdev->config.cik.tile_config |=
3330                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3331         rdev->config.cik.tile_config |=
3332                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3333
3334         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3335         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3336         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3337         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3338         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3339         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3340         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3341         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3342
3343         cik_tiling_mode_table_init(rdev);
3344
3345         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3346                      rdev->config.cik.max_sh_per_se,
3347                      rdev->config.cik.max_backends_per_se);
3348
3349         rdev->config.cik.active_cus = 0;
3350         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3351                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3352                         rdev->config.cik.active_cus +=
3353                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3354                 }
3355         }
3356
3357         /* set HW defaults for 3D engine */
3358         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3359
3360         WREG32(SX_DEBUG_1, 0x20);
3361
3362         WREG32(TA_CNTL_AUX, 0x00010000);
3363
3364         tmp = RREG32(SPI_CONFIG_CNTL);
3365         tmp |= 0x03000000;
3366         WREG32(SPI_CONFIG_CNTL, tmp);
3367
3368         WREG32(SQ_CONFIG, 1);
3369
3370         WREG32(DB_DEBUG, 0);
3371
3372         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3373         tmp |= 0x00000400;
3374         WREG32(DB_DEBUG2, tmp);
3375
3376         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3377         tmp |= 0x00020200;
3378         WREG32(DB_DEBUG3, tmp);
3379
3380         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3381         tmp |= 0x00018208;
3382         WREG32(CB_HW_CONTROL, tmp);
3383
3384         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3385
3386         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3387                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3388                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3389                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3390
3391         WREG32(VGT_NUM_INSTANCES, 1);
3392
3393         WREG32(CP_PERFMON_CNTL, 0);
3394
3395         WREG32(SQ_CONFIG, 0);
3396
3397         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3398                                           FORCE_EOV_MAX_REZ_CNT(255)));
3399
3400         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3401                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3402
3403         WREG32(VGT_GS_VERTEX_REUSE, 16);
3404         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3405
3406         tmp = RREG32(HDP_MISC_CNTL);
3407         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3408         WREG32(HDP_MISC_CNTL, tmp);
3409
3410         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3411         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3412
3413         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3414         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3415
3416         udelay(50);
3417 }
3418
3419 /*
3420  * GPU scratch registers helpers function.
3421  */
3422 /**
3423  * cik_scratch_init - setup driver info for CP scratch regs
3424  *
3425  * @rdev: radeon_device pointer
3426  *
3427  * Set up the number and offset of the CP scratch registers.
3428  * NOTE: use of CP scratch registers is a legacy inferface and
3429  * is not used by default on newer asics (r6xx+).  On newer asics,
3430  * memory buffers are used for fences rather than scratch regs.
3431  */
3432 static void cik_scratch_init(struct radeon_device *rdev)
3433 {
3434         int i;
3435
3436         rdev->scratch.num_reg = 7;
3437         rdev->scratch.reg_base = SCRATCH_REG0;
3438         for (i = 0; i < rdev->scratch.num_reg; i++) {
3439                 rdev->scratch.free[i] = true;
3440                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3441         }
3442 }
3443
3444 /**
3445  * cik_ring_test - basic gfx ring test
3446  *
3447  * @rdev: radeon_device pointer
3448  * @ring: radeon_ring structure holding ring information
3449  *
3450  * Allocate a scratch register and write to it using the gfx ring (CIK).
3451  * Provides a basic gfx ring test to verify that the ring is working.
3452  * Used by cik_cp_gfx_resume();
3453  * Returns 0 on success, error on failure.
3454  */
3455 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3456 {
3457         uint32_t scratch;
3458         uint32_t tmp = 0;
3459         unsigned i;
3460         int r;
3461
3462         r = radeon_scratch_get(rdev, &scratch);
3463         if (r) {
3464                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3465                 return r;
3466         }
3467         WREG32(scratch, 0xCAFEDEAD);
3468         r = radeon_ring_lock(rdev, ring, 3);
3469         if (r) {
3470                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3471                 radeon_scratch_free(rdev, scratch);
3472                 return r;
3473         }
3474         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3475         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3476         radeon_ring_write(ring, 0xDEADBEEF);
3477         radeon_ring_unlock_commit(rdev, ring, false);
3478
3479         for (i = 0; i < rdev->usec_timeout; i++) {
3480                 tmp = RREG32(scratch);
3481                 if (tmp == 0xDEADBEEF)
3482                         break;
3483                 udelay(1);
3484         }
3485         if (i < rdev->usec_timeout) {
3486                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3487         } else {
3488                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3489                           ring->idx, scratch, tmp);
3490                 r = -EINVAL;
3491         }
3492         radeon_scratch_free(rdev, scratch);
3493         return r;
3494 }
3495
3496 /**
3497  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3498  *
3499  * @rdev: radeon_device pointer
3500  * @ridx: radeon ring index
3501  *
3502  * Emits an hdp flush on the cp.
3503  */
3504 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3505                                        int ridx)
3506 {
3507         struct radeon_ring *ring = &rdev->ring[ridx];
3508         u32 ref_and_mask;
3509
3510         switch (ring->idx) {
3511         case CAYMAN_RING_TYPE_CP1_INDEX:
3512         case CAYMAN_RING_TYPE_CP2_INDEX:
3513         default:
3514                 switch (ring->me) {
3515                 case 0:
3516                         ref_and_mask = CP2 << ring->pipe;
3517                         break;
3518                 case 1:
3519                         ref_and_mask = CP6 << ring->pipe;
3520                         break;
3521                 default:
3522                         return;
3523                 }
3524                 break;
3525         case RADEON_RING_TYPE_GFX_INDEX:
3526                 ref_and_mask = CP0;
3527                 break;
3528         }
3529
3530         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3531         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3532                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3533                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3534         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3535         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3536         radeon_ring_write(ring, ref_and_mask);
3537         radeon_ring_write(ring, ref_and_mask);
3538         radeon_ring_write(ring, 0x20); /* poll interval */
3539 }
3540
3541 /**
3542  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3543  *
3544  * @rdev: radeon_device pointer
3545  * @fence: radeon fence object
3546  *
3547  * Emits a fence sequnce number on the gfx ring and flushes
3548  * GPU caches.
3549  */
3550 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3551                              struct radeon_fence *fence)
3552 {
3553         struct radeon_ring *ring = &rdev->ring[fence->ring];
3554         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3555
3556         /* Workaround for cache flush problems. First send a dummy EOP
3557          * event down the pipe with seq one below.
3558          */
3559         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3560         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3561                                  EOP_TC_ACTION_EN |
3562                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3563                                  EVENT_INDEX(5)));
3564         radeon_ring_write(ring, addr & 0xfffffffc);
3565         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3566                                 DATA_SEL(1) | INT_SEL(0));
3567         radeon_ring_write(ring, fence->seq - 1);
3568         radeon_ring_write(ring, 0);
3569
3570         /* Then send the real EOP event down the pipe. */
3571         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3572         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3573                                  EOP_TC_ACTION_EN |
3574                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3575                                  EVENT_INDEX(5)));
3576         radeon_ring_write(ring, addr & 0xfffffffc);
3577         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3578         radeon_ring_write(ring, fence->seq);
3579         radeon_ring_write(ring, 0);
3580 }
3581
3582 /**
3583  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3584  *
3585  * @rdev: radeon_device pointer
3586  * @fence: radeon fence object
3587  *
3588  * Emits a fence sequnce number on the compute ring and flushes
3589  * GPU caches.
3590  */
3591 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3592                                  struct radeon_fence *fence)
3593 {
3594         struct radeon_ring *ring = &rdev->ring[fence->ring];
3595         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3596
3597         /* RELEASE_MEM - flush caches, send int */
3598         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3599         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3600                                  EOP_TC_ACTION_EN |
3601                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3602                                  EVENT_INDEX(5)));
3603         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3604         radeon_ring_write(ring, addr & 0xfffffffc);
3605         radeon_ring_write(ring, upper_32_bits(addr));
3606         radeon_ring_write(ring, fence->seq);
3607         radeon_ring_write(ring, 0);
3608 }
3609
3610 /**
3611  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3612  *
3613  * @rdev: radeon_device pointer
3614  * @ring: radeon ring buffer object
3615  * @semaphore: radeon semaphore object
3616  * @emit_wait: Is this a sempahore wait?
3617  *
3618  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3619  * from running ahead of semaphore waits.
3620  */
3621 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3622                              struct radeon_ring *ring,
3623                              struct radeon_semaphore *semaphore,
3624                              bool emit_wait)
3625 {
3626         uint64_t addr = semaphore->gpu_addr;
3627         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3628
3629         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3630         radeon_ring_write(ring, lower_32_bits(addr));
3631         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3632
3633         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3634                 /* Prevent the PFP from running ahead of the semaphore wait */
3635                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3636                 radeon_ring_write(ring, 0x0);
3637         }
3638
3639         return true;
3640 }
3641
3642 /**
3643  * cik_copy_cpdma - copy pages using the CP DMA engine
3644  *
3645  * @rdev: radeon_device pointer
3646  * @src_offset: src GPU address
3647  * @dst_offset: dst GPU address
3648  * @num_gpu_pages: number of GPU pages to xfer
3649  * @resv: reservation object to sync to
3650  *
3651  * Copy GPU paging using the CP DMA engine (CIK+).
3652  * Used by the radeon ttm implementation to move pages if
3653  * registered as the asic copy callback.
3654  */
3655 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3656                                     uint64_t src_offset, uint64_t dst_offset,
3657                                     unsigned num_gpu_pages,
3658                                     struct dma_resv *resv)
3659 {
3660         struct radeon_fence *fence;
3661         struct radeon_sync sync;
3662         int ring_index = rdev->asic->copy.blit_ring_index;
3663         struct radeon_ring *ring = &rdev->ring[ring_index];
3664         u32 size_in_bytes, cur_size_in_bytes, control;
3665         int i, num_loops;
3666         int r = 0;
3667
3668         radeon_sync_create(&sync);
3669
3670         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3671         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3672         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3673         if (r) {
3674                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3675                 radeon_sync_free(rdev, &sync, NULL);
3676                 return ERR_PTR(r);
3677         }
3678
3679         radeon_sync_resv(rdev, &sync, resv, false);
3680         radeon_sync_rings(rdev, &sync, ring->idx);
3681
3682         for (i = 0; i < num_loops; i++) {
3683                 cur_size_in_bytes = size_in_bytes;
3684                 if (cur_size_in_bytes > 0x1fffff)
3685                         cur_size_in_bytes = 0x1fffff;
3686                 size_in_bytes -= cur_size_in_bytes;
3687                 control = 0;
3688                 if (size_in_bytes == 0)
3689                         control |= PACKET3_DMA_DATA_CP_SYNC;
3690                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3691                 radeon_ring_write(ring, control);
3692                 radeon_ring_write(ring, lower_32_bits(src_offset));
3693                 radeon_ring_write(ring, upper_32_bits(src_offset));
3694                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3695                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3696                 radeon_ring_write(ring, cur_size_in_bytes);
3697                 src_offset += cur_size_in_bytes;
3698                 dst_offset += cur_size_in_bytes;
3699         }
3700
3701         r = radeon_fence_emit(rdev, &fence, ring->idx);
3702         if (r) {
3703                 radeon_ring_unlock_undo(rdev, ring);
3704                 radeon_sync_free(rdev, &sync, NULL);
3705                 return ERR_PTR(r);
3706         }
3707
3708         radeon_ring_unlock_commit(rdev, ring, false);
3709         radeon_sync_free(rdev, &sync, fence);
3710
3711         return fence;
3712 }
3713
3714 /*
3715  * IB stuff
3716  */
3717 /**
3718  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3719  *
3720  * @rdev: radeon_device pointer
3721  * @ib: radeon indirect buffer object
3722  *
3723  * Emits a DE (drawing engine) or CE (constant engine) IB
3724  * on the gfx ring.  IBs are usually generated by userspace
3725  * acceleration drivers and submitted to the kernel for
3726  * scheduling on the ring.  This function schedules the IB
3727  * on the gfx ring for execution by the GPU.
3728  */
3729 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3730 {
3731         struct radeon_ring *ring = &rdev->ring[ib->ring];
3732         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3733         u32 header, control = INDIRECT_BUFFER_VALID;
3734
3735         if (ib->is_const_ib) {
3736                 /* set switch buffer packet before const IB */
3737                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3738                 radeon_ring_write(ring, 0);
3739
3740                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3741         } else {
3742                 u32 next_rptr;
3743                 if (ring->rptr_save_reg) {
3744                         next_rptr = ring->wptr + 3 + 4;
3745                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3746                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3747                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3748                         radeon_ring_write(ring, next_rptr);
3749                 } else if (rdev->wb.enabled) {
3750                         next_rptr = ring->wptr + 5 + 4;
3751                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3752                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3753                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3754                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3755                         radeon_ring_write(ring, next_rptr);
3756                 }
3757
3758                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3759         }
3760
3761         control |= ib->length_dw | (vm_id << 24);
3762
3763         radeon_ring_write(ring, header);
3764         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3765         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3766         radeon_ring_write(ring, control);
3767 }
3768
3769 /**
3770  * cik_ib_test - basic gfx ring IB test
3771  *
3772  * @rdev: radeon_device pointer
3773  * @ring: radeon_ring structure holding ring information
3774  *
3775  * Allocate an IB and execute it on the gfx ring (CIK).
3776  * Provides a basic gfx ring test to verify that IBs are working.
3777  * Returns 0 on success, error on failure.
3778  */
3779 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3780 {
3781         struct radeon_ib ib;
3782         uint32_t scratch;
3783         uint32_t tmp = 0;
3784         unsigned i;
3785         int r;
3786
3787         r = radeon_scratch_get(rdev, &scratch);
3788         if (r) {
3789                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3790                 return r;
3791         }
3792         WREG32(scratch, 0xCAFEDEAD);
3793         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3794         if (r) {
3795                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3796                 radeon_scratch_free(rdev, scratch);
3797                 return r;
3798         }
3799         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3800         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3801         ib.ptr[2] = 0xDEADBEEF;
3802         ib.length_dw = 3;
3803         r = radeon_ib_schedule(rdev, &ib, NULL, false);
3804         if (r) {
3805                 radeon_scratch_free(rdev, scratch);
3806                 radeon_ib_free(rdev, &ib);
3807                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3808                 return r;
3809         }
3810         r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3811                 RADEON_USEC_IB_TEST_TIMEOUT));
3812         if (r < 0) {
3813                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3814                 radeon_scratch_free(rdev, scratch);
3815                 radeon_ib_free(rdev, &ib);
3816                 return r;
3817         } else if (r == 0) {
3818                 DRM_ERROR("radeon: fence wait timed out.\n");
3819                 radeon_scratch_free(rdev, scratch);
3820                 radeon_ib_free(rdev, &ib);
3821                 return -ETIMEDOUT;
3822         }
3823         r = 0;
3824         for (i = 0; i < rdev->usec_timeout; i++) {
3825                 tmp = RREG32(scratch);
3826                 if (tmp == 0xDEADBEEF)
3827                         break;
3828                 udelay(1);
3829         }
3830         if (i < rdev->usec_timeout) {
3831                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3832         } else {
3833                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3834                           scratch, tmp);
3835                 r = -EINVAL;
3836         }
3837         radeon_scratch_free(rdev, scratch);
3838         radeon_ib_free(rdev, &ib);
3839         return r;
3840 }
3841
3842 /*
3843  * CP.
3844  * On CIK, gfx and compute now have independant command processors.
3845  *
3846  * GFX
3847  * Gfx consists of a single ring and can process both gfx jobs and
3848  * compute jobs.  The gfx CP consists of three microengines (ME):
3849  * PFP - Pre-Fetch Parser
3850  * ME - Micro Engine
3851  * CE - Constant Engine
3852  * The PFP and ME make up what is considered the Drawing Engine (DE).
3853  * The CE is an asynchronous engine used for updating buffer desciptors
3854  * used by the DE so that they can be loaded into cache in parallel
3855  * while the DE is processing state update packets.
3856  *
3857  * Compute
3858  * The compute CP consists of two microengines (ME):
3859  * MEC1 - Compute MicroEngine 1
3860  * MEC2 - Compute MicroEngine 2
3861  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3862  * The queues are exposed to userspace and are programmed directly
3863  * by the compute runtime.
3864  */
3865 /**
3866  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3867  *
3868  * @rdev: radeon_device pointer
3869  * @enable: enable or disable the MEs
3870  *
3871  * Halts or unhalts the gfx MEs.
3872  */
3873 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3874 {
3875         if (enable)
3876                 WREG32(CP_ME_CNTL, 0);
3877         else {
3878                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3879                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3880                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3881                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3882         }
3883         udelay(50);
3884 }
3885
3886 /**
3887  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3888  *
3889  * @rdev: radeon_device pointer
3890  *
3891  * Loads the gfx PFP, ME, and CE ucode.
3892  * Returns 0 for success, -EINVAL if the ucode is not available.
3893  */
3894 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3895 {
3896         int i;
3897
3898         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3899                 return -EINVAL;
3900
3901         cik_cp_gfx_enable(rdev, false);
3902
3903         if (rdev->new_fw) {
3904                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3905                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3906                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3907                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3908                 const struct gfx_firmware_header_v1_0 *me_hdr =
3909                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3910                 const __le32 *fw_data;
3911                 u32 fw_size;
3912
3913                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3914                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3915                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3916
3917                 /* PFP */
3918                 fw_data = (const __le32 *)
3919                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3920                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3921                 WREG32(CP_PFP_UCODE_ADDR, 0);
3922                 for (i = 0; i < fw_size; i++)
3923                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3924                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3925
3926                 /* CE */
3927                 fw_data = (const __le32 *)
3928                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3929                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3930                 WREG32(CP_CE_UCODE_ADDR, 0);
3931                 for (i = 0; i < fw_size; i++)
3932                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3933                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3934
3935                 /* ME */
3936                 fw_data = (const __be32 *)
3937                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3938                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3939                 WREG32(CP_ME_RAM_WADDR, 0);
3940                 for (i = 0; i < fw_size; i++)
3941                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3942                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3943                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3944         } else {
3945                 const __be32 *fw_data;
3946
3947                 /* PFP */
3948                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3949                 WREG32(CP_PFP_UCODE_ADDR, 0);
3950                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3951                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3952                 WREG32(CP_PFP_UCODE_ADDR, 0);
3953
3954                 /* CE */
3955                 fw_data = (const __be32 *)rdev->ce_fw->data;
3956                 WREG32(CP_CE_UCODE_ADDR, 0);
3957                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3958                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3959                 WREG32(CP_CE_UCODE_ADDR, 0);
3960
3961                 /* ME */
3962                 fw_data = (const __be32 *)rdev->me_fw->data;
3963                 WREG32(CP_ME_RAM_WADDR, 0);
3964                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3965                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3966                 WREG32(CP_ME_RAM_WADDR, 0);
3967         }
3968
3969         return 0;
3970 }
3971
3972 /**
3973  * cik_cp_gfx_start - start the gfx ring
3974  *
3975  * @rdev: radeon_device pointer
3976  *
3977  * Enables the ring and loads the clear state context and other
3978  * packets required to init the ring.
3979  * Returns 0 for success, error for failure.
3980  */
3981 static int cik_cp_gfx_start(struct radeon_device *rdev)
3982 {
3983         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3984         int r, i;
3985
3986         /* init the CP */
3987         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3988         WREG32(CP_ENDIAN_SWAP, 0);
3989         WREG32(CP_DEVICE_ID, 1);
3990
3991         cik_cp_gfx_enable(rdev, true);
3992
3993         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3994         if (r) {
3995                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3996                 return r;
3997         }
3998
3999         /* init the CE partitions.  CE only used for gfx on CIK */
4000         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4001         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4002         radeon_ring_write(ring, 0x8000);
4003         radeon_ring_write(ring, 0x8000);
4004
4005         /* setup clear context state */
4006         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4007         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4008
4009         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4010         radeon_ring_write(ring, 0x80000000);
4011         radeon_ring_write(ring, 0x80000000);
4012
4013         for (i = 0; i < cik_default_size; i++)
4014                 radeon_ring_write(ring, cik_default_state[i]);
4015
4016         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4017         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4018
4019         /* set clear context state */
4020         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4021         radeon_ring_write(ring, 0);
4022
4023         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4024         radeon_ring_write(ring, 0x00000316);
4025         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4026         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4027
4028         radeon_ring_unlock_commit(rdev, ring, false);
4029
4030         return 0;
4031 }
4032
4033 /**
4034  * cik_cp_gfx_fini - stop the gfx ring
4035  *
4036  * @rdev: radeon_device pointer
4037  *
4038  * Stop the gfx ring and tear down the driver ring
4039  * info.
4040  */
4041 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4042 {
4043         cik_cp_gfx_enable(rdev, false);
4044         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4045 }
4046
4047 /**
4048  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4049  *
4050  * @rdev: radeon_device pointer
4051  *
4052  * Program the location and size of the gfx ring buffer
4053  * and test it to make sure it's working.
4054  * Returns 0 for success, error for failure.
4055  */
4056 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4057 {
4058         struct radeon_ring *ring;
4059         u32 tmp;
4060         u32 rb_bufsz;
4061         u64 rb_addr;
4062         int r;
4063
4064         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4065         if (rdev->family != CHIP_HAWAII)
4066                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4067
4068         /* Set the write pointer delay */
4069         WREG32(CP_RB_WPTR_DELAY, 0);
4070
4071         /* set the RB to use vmid 0 */
4072         WREG32(CP_RB_VMID, 0);
4073
4074         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4075
4076         /* ring 0 - compute and gfx */
4077         /* Set ring buffer size */
4078         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4079         rb_bufsz = order_base_2(ring->ring_size / 8);
4080         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4081 #ifdef __BIG_ENDIAN
4082         tmp |= BUF_SWAP_32BIT;
4083 #endif
4084         WREG32(CP_RB0_CNTL, tmp);
4085
4086         /* Initialize the ring buffer's read and write pointers */
4087         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4088         ring->wptr = 0;
4089         WREG32(CP_RB0_WPTR, ring->wptr);
4090
4091         /* set the wb address wether it's enabled or not */
4092         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4093         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4094
4095         /* scratch register shadowing is no longer supported */
4096         WREG32(SCRATCH_UMSK, 0);
4097
4098         if (!rdev->wb.enabled)
4099                 tmp |= RB_NO_UPDATE;
4100
4101         mdelay(1);
4102         WREG32(CP_RB0_CNTL, tmp);
4103
4104         rb_addr = ring->gpu_addr >> 8;
4105         WREG32(CP_RB0_BASE, rb_addr);
4106         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4107
4108         /* start the ring */
4109         cik_cp_gfx_start(rdev);
4110         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4111         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4112         if (r) {
4113                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4114                 return r;
4115         }
4116
4117         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4118                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4119
4120         return 0;
4121 }
4122
4123 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4124                      struct radeon_ring *ring)
4125 {
4126         u32 rptr;
4127
4128         if (rdev->wb.enabled)
4129                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4130         else
4131                 rptr = RREG32(CP_RB0_RPTR);
4132
4133         return rptr;
4134 }
4135
4136 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4137                      struct radeon_ring *ring)
4138 {
4139         return RREG32(CP_RB0_WPTR);
4140 }
4141
4142 void cik_gfx_set_wptr(struct radeon_device *rdev,
4143                       struct radeon_ring *ring)
4144 {
4145         WREG32(CP_RB0_WPTR, ring->wptr);
4146         (void)RREG32(CP_RB0_WPTR);
4147 }
4148
4149 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4150                          struct radeon_ring *ring)
4151 {
4152         u32 rptr;
4153
4154         if (rdev->wb.enabled) {
4155                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4156         } else {
4157                 mutex_lock(&rdev->srbm_mutex);
4158                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4159                 rptr = RREG32(CP_HQD_PQ_RPTR);
4160                 cik_srbm_select(rdev, 0, 0, 0, 0);
4161                 mutex_unlock(&rdev->srbm_mutex);
4162         }
4163
4164         return rptr;
4165 }
4166
4167 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4168                          struct radeon_ring *ring)
4169 {
4170         u32 wptr;
4171
4172         if (rdev->wb.enabled) {
4173                 /* XXX check if swapping is necessary on BE */
4174                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4175         } else {
4176                 mutex_lock(&rdev->srbm_mutex);
4177                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4178                 wptr = RREG32(CP_HQD_PQ_WPTR);
4179                 cik_srbm_select(rdev, 0, 0, 0, 0);
4180                 mutex_unlock(&rdev->srbm_mutex);
4181         }
4182
4183         return wptr;
4184 }
4185
4186 void cik_compute_set_wptr(struct radeon_device *rdev,
4187                           struct radeon_ring *ring)
4188 {
4189         /* XXX check if swapping is necessary on BE */
4190         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4191         WDOORBELL32(ring->doorbell_index, ring->wptr);
4192 }
4193
4194 static void cik_compute_stop(struct radeon_device *rdev,
4195                              struct radeon_ring *ring)
4196 {
4197         u32 j, tmp;
4198
4199         cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4200         /* Disable wptr polling. */
4201         tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4202         tmp &= ~WPTR_POLL_EN;
4203         WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4204         /* Disable HQD. */
4205         if (RREG32(CP_HQD_ACTIVE) & 1) {
4206                 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4207                 for (j = 0; j < rdev->usec_timeout; j++) {
4208                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4209                                 break;
4210                         udelay(1);
4211                 }
4212                 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4213                 WREG32(CP_HQD_PQ_RPTR, 0);
4214                 WREG32(CP_HQD_PQ_WPTR, 0);
4215         }
4216         cik_srbm_select(rdev, 0, 0, 0, 0);
4217 }
4218
4219 /**
4220  * cik_cp_compute_enable - enable/disable the compute CP MEs
4221  *
4222  * @rdev: radeon_device pointer
4223  * @enable: enable or disable the MEs
4224  *
4225  * Halts or unhalts the compute MEs.
4226  */
4227 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4228 {
4229         if (enable)
4230                 WREG32(CP_MEC_CNTL, 0);
4231         else {
4232                 /*
4233                  * To make hibernation reliable we need to clear compute ring
4234                  * configuration before halting the compute ring.
4235                  */
4236                 mutex_lock(&rdev->srbm_mutex);
4237                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4238                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4239                 mutex_unlock(&rdev->srbm_mutex);
4240
4241                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4242                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4243                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4244         }
4245         udelay(50);
4246 }
4247
4248 /**
4249  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4250  *
4251  * @rdev: radeon_device pointer
4252  *
4253  * Loads the compute MEC1&2 ucode.
4254  * Returns 0 for success, -EINVAL if the ucode is not available.
4255  */
4256 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4257 {
4258         int i;
4259
4260         if (!rdev->mec_fw)
4261                 return -EINVAL;
4262
4263         cik_cp_compute_enable(rdev, false);
4264
4265         if (rdev->new_fw) {
4266                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4267                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4268                 const __le32 *fw_data;
4269                 u32 fw_size;
4270
4271                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4272
4273                 /* MEC1 */
4274                 fw_data = (const __le32 *)
4275                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4276                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4277                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4278                 for (i = 0; i < fw_size; i++)
4279                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4280                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4281
4282                 /* MEC2 */
4283                 if (rdev->family == CHIP_KAVERI) {
4284                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4285                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4286
4287                         fw_data = (const __le32 *)
4288                                 (rdev->mec2_fw->data +
4289                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4290                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4291                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4292                         for (i = 0; i < fw_size; i++)
4293                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4294                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4295                 }
4296         } else {
4297                 const __be32 *fw_data;
4298
4299                 /* MEC1 */
4300                 fw_data = (const __be32 *)rdev->mec_fw->data;
4301                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4302                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4303                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4304                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4305
4306                 if (rdev->family == CHIP_KAVERI) {
4307                         /* MEC2 */
4308                         fw_data = (const __be32 *)rdev->mec_fw->data;
4309                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4310                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4311                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4312                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4313                 }
4314         }
4315
4316         return 0;
4317 }
4318
4319 /**
4320  * cik_cp_compute_start - start the compute queues
4321  *
4322  * @rdev: radeon_device pointer
4323  *
4324  * Enable the compute queues.
4325  * Returns 0 for success, error for failure.
4326  */
4327 static int cik_cp_compute_start(struct radeon_device *rdev)
4328 {
4329         cik_cp_compute_enable(rdev, true);
4330
4331         return 0;
4332 }
4333
4334 /**
4335  * cik_cp_compute_fini - stop the compute queues
4336  *
4337  * @rdev: radeon_device pointer
4338  *
4339  * Stop the compute queues and tear down the driver queue
4340  * info.
4341  */
4342 static void cik_cp_compute_fini(struct radeon_device *rdev)
4343 {
4344         int i, idx, r;
4345
4346         cik_cp_compute_enable(rdev, false);
4347
4348         for (i = 0; i < 2; i++) {
4349                 if (i == 0)
4350                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4351                 else
4352                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4353
4354                 if (rdev->ring[idx].mqd_obj) {
4355                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4356                         if (unlikely(r != 0))
4357                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4358
4359                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4360                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4361
4362                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4363                         rdev->ring[idx].mqd_obj = NULL;
4364                 }
4365         }
4366 }
4367
4368 static void cik_mec_fini(struct radeon_device *rdev)
4369 {
4370         int r;
4371
4372         if (rdev->mec.hpd_eop_obj) {
4373                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4374                 if (unlikely(r != 0))
4375                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4376                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4377                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4378
4379                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4380                 rdev->mec.hpd_eop_obj = NULL;
4381         }
4382 }
4383
4384 #define MEC_HPD_SIZE 2048
4385
4386 static int cik_mec_init(struct radeon_device *rdev)
4387 {
4388         int r;
4389         u32 *hpd;
4390
4391         /*
4392          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4393          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4394          */
4395         if (rdev->family == CHIP_KAVERI)
4396                 rdev->mec.num_mec = 2;
4397         else
4398                 rdev->mec.num_mec = 1;
4399         rdev->mec.num_pipe = 4;
4400         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4401
4402         if (rdev->mec.hpd_eop_obj == NULL) {
4403                 r = radeon_bo_create(rdev,
4404                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4405                                      PAGE_SIZE, true,
4406                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4407                                      &rdev->mec.hpd_eop_obj);
4408                 if (r) {
4409                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4410                         return r;
4411                 }
4412         }
4413
4414         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4415         if (unlikely(r != 0)) {
4416                 cik_mec_fini(rdev);
4417                 return r;
4418         }
4419         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4420                           &rdev->mec.hpd_eop_gpu_addr);
4421         if (r) {
4422                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4423                 cik_mec_fini(rdev);
4424                 return r;
4425         }
4426         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4427         if (r) {
4428                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4429                 cik_mec_fini(rdev);
4430                 return r;
4431         }
4432
4433         /* clear memory.  Not sure if this is required or not */
4434         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4435
4436         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4437         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4438
4439         return 0;
4440 }
4441
4442 struct hqd_registers
4443 {
4444         u32 cp_mqd_base_addr;
4445         u32 cp_mqd_base_addr_hi;
4446         u32 cp_hqd_active;
4447         u32 cp_hqd_vmid;
4448         u32 cp_hqd_persistent_state;
4449         u32 cp_hqd_pipe_priority;
4450         u32 cp_hqd_queue_priority;
4451         u32 cp_hqd_quantum;
4452         u32 cp_hqd_pq_base;
4453         u32 cp_hqd_pq_base_hi;
4454         u32 cp_hqd_pq_rptr;
4455         u32 cp_hqd_pq_rptr_report_addr;
4456         u32 cp_hqd_pq_rptr_report_addr_hi;
4457         u32 cp_hqd_pq_wptr_poll_addr;
4458         u32 cp_hqd_pq_wptr_poll_addr_hi;
4459         u32 cp_hqd_pq_doorbell_control;
4460         u32 cp_hqd_pq_wptr;
4461         u32 cp_hqd_pq_control;
4462         u32 cp_hqd_ib_base_addr;
4463         u32 cp_hqd_ib_base_addr_hi;
4464         u32 cp_hqd_ib_rptr;
4465         u32 cp_hqd_ib_control;
4466         u32 cp_hqd_iq_timer;
4467         u32 cp_hqd_iq_rptr;
4468         u32 cp_hqd_dequeue_request;
4469         u32 cp_hqd_dma_offload;
4470         u32 cp_hqd_sema_cmd;
4471         u32 cp_hqd_msg_type;
4472         u32 cp_hqd_atomic0_preop_lo;
4473         u32 cp_hqd_atomic0_preop_hi;
4474         u32 cp_hqd_atomic1_preop_lo;
4475         u32 cp_hqd_atomic1_preop_hi;
4476         u32 cp_hqd_hq_scheduler0;
4477         u32 cp_hqd_hq_scheduler1;
4478         u32 cp_mqd_control;
4479 };
4480
4481 struct bonaire_mqd
4482 {
4483         u32 header;
4484         u32 dispatch_initiator;
4485         u32 dimensions[3];
4486         u32 start_idx[3];
4487         u32 num_threads[3];
4488         u32 pipeline_stat_enable;
4489         u32 perf_counter_enable;
4490         u32 pgm[2];
4491         u32 tba[2];
4492         u32 tma[2];
4493         u32 pgm_rsrc[2];
4494         u32 vmid;
4495         u32 resource_limits;
4496         u32 static_thread_mgmt01[2];
4497         u32 tmp_ring_size;
4498         u32 static_thread_mgmt23[2];
4499         u32 restart[3];
4500         u32 thread_trace_enable;
4501         u32 reserved1;
4502         u32 user_data[16];
4503         u32 vgtcs_invoke_count[2];
4504         struct hqd_registers queue_state;
4505         u32 dequeue_cntr;
4506         u32 interrupt_queue[64];
4507 };
4508
4509 /**
4510  * cik_cp_compute_resume - setup the compute queue registers
4511  *
4512  * @rdev: radeon_device pointer
4513  *
4514  * Program the compute queues and test them to make sure they
4515  * are working.
4516  * Returns 0 for success, error for failure.
4517  */
4518 static int cik_cp_compute_resume(struct radeon_device *rdev)
4519 {
4520         int r, i, j, idx;
4521         u32 tmp;
4522         bool use_doorbell = true;
4523         u64 hqd_gpu_addr;
4524         u64 mqd_gpu_addr;
4525         u64 eop_gpu_addr;
4526         u64 wb_gpu_addr;
4527         u32 *buf;
4528         struct bonaire_mqd *mqd;
4529
4530         r = cik_cp_compute_start(rdev);
4531         if (r)
4532                 return r;
4533
4534         /* fix up chicken bits */
4535         tmp = RREG32(CP_CPF_DEBUG);
4536         tmp |= (1 << 23);
4537         WREG32(CP_CPF_DEBUG, tmp);
4538
4539         /* init the pipes */
4540         mutex_lock(&rdev->srbm_mutex);
4541
4542         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4543                 int me = (i < 4) ? 1 : 2;
4544                 int pipe = (i < 4) ? i : (i - 4);
4545
4546                 cik_srbm_select(rdev, me, pipe, 0, 0);
4547
4548                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4549                 /* write the EOP addr */
4550                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4551                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4552
4553                 /* set the VMID assigned */
4554                 WREG32(CP_HPD_EOP_VMID, 0);
4555
4556                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4557                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4558                 tmp &= ~EOP_SIZE_MASK;
4559                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4560                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4561
4562         }
4563         cik_srbm_select(rdev, 0, 0, 0, 0);
4564         mutex_unlock(&rdev->srbm_mutex);
4565
4566         /* init the queues.  Just two for now. */
4567         for (i = 0; i < 2; i++) {
4568                 if (i == 0)
4569                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4570                 else
4571                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4572
4573                 if (rdev->ring[idx].mqd_obj == NULL) {
4574                         r = radeon_bo_create(rdev,
4575                                              sizeof(struct bonaire_mqd),
4576                                              PAGE_SIZE, true,
4577                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4578                                              NULL, &rdev->ring[idx].mqd_obj);
4579                         if (r) {
4580                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4581                                 return r;
4582                         }
4583                 }
4584
4585                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4586                 if (unlikely(r != 0)) {
4587                         cik_cp_compute_fini(rdev);
4588                         return r;
4589                 }
4590                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4591                                   &mqd_gpu_addr);
4592                 if (r) {
4593                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4594                         cik_cp_compute_fini(rdev);
4595                         return r;
4596                 }
4597                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4598                 if (r) {
4599                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4600                         cik_cp_compute_fini(rdev);
4601                         return r;
4602                 }
4603
4604                 /* init the mqd struct */
4605                 memset(buf, 0, sizeof(struct bonaire_mqd));
4606
4607                 mqd = (struct bonaire_mqd *)buf;
4608                 mqd->header = 0xC0310800;
4609                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4610                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4611                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4612                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4613
4614                 mutex_lock(&rdev->srbm_mutex);
4615                 cik_srbm_select(rdev, rdev->ring[idx].me,
4616                                 rdev->ring[idx].pipe,
4617                                 rdev->ring[idx].queue, 0);
4618
4619                 /* disable wptr polling */
4620                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4621                 tmp &= ~WPTR_POLL_EN;
4622                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4623
4624                 /* enable doorbell? */
4625                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4626                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4627                 if (use_doorbell)
4628                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4629                 else
4630                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4631                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4632                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4633
4634                 /* disable the queue if it's active */
4635                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4636                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4637                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4638                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4639                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4640                         for (j = 0; j < rdev->usec_timeout; j++) {
4641                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4642                                         break;
4643                                 udelay(1);
4644                         }
4645                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4646                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4647                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4648                 }
4649
4650                 /* set the pointer to the MQD */
4651                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4652                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4653                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4654                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4655                 /* set MQD vmid to 0 */
4656                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4657                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4658                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4659
4660                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4661                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4662                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4663                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4664                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4665                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4666
4667                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4668                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4669                 mqd->queue_state.cp_hqd_pq_control &=
4670                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4671
4672                 mqd->queue_state.cp_hqd_pq_control |=
4673                         order_base_2(rdev->ring[idx].ring_size / 8);
4674                 mqd->queue_state.cp_hqd_pq_control |=
4675                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4676 #ifdef __BIG_ENDIAN
4677                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4678 #endif
4679                 mqd->queue_state.cp_hqd_pq_control &=
4680                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4681                 mqd->queue_state.cp_hqd_pq_control |=
4682                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4683                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4684
4685                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4686                 if (i == 0)
4687                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4688                 else
4689                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4690                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4691                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4692                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4693                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4694                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4695
4696                 /* set the wb address wether it's enabled or not */
4697                 if (i == 0)
4698                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4699                 else
4700                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4701                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4702                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4703                         upper_32_bits(wb_gpu_addr) & 0xffff;
4704                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4705                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4706                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4707                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4708
4709                 /* enable the doorbell if requested */
4710                 if (use_doorbell) {
4711                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4712                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4713                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4714                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4715                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4716                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4717                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4718                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4719
4720                 } else {
4721                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4722                 }
4723                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4724                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4725
4726                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4727                 rdev->ring[idx].wptr = 0;
4728                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4729                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4730                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4731
4732                 /* set the vmid for the queue */
4733                 mqd->queue_state.cp_hqd_vmid = 0;
4734                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4735
4736                 /* activate the queue */
4737                 mqd->queue_state.cp_hqd_active = 1;
4738                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4739
4740                 cik_srbm_select(rdev, 0, 0, 0, 0);
4741                 mutex_unlock(&rdev->srbm_mutex);
4742
4743                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4744                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4745
4746                 rdev->ring[idx].ready = true;
4747                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4748                 if (r)
4749                         rdev->ring[idx].ready = false;
4750         }
4751
4752         return 0;
4753 }
4754
4755 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4756 {
4757         cik_cp_gfx_enable(rdev, enable);
4758         cik_cp_compute_enable(rdev, enable);
4759 }
4760
4761 static int cik_cp_load_microcode(struct radeon_device *rdev)
4762 {
4763         int r;
4764
4765         r = cik_cp_gfx_load_microcode(rdev);
4766         if (r)
4767                 return r;
4768         r = cik_cp_compute_load_microcode(rdev);
4769         if (r)
4770                 return r;
4771
4772         return 0;
4773 }
4774
4775 static void cik_cp_fini(struct radeon_device *rdev)
4776 {
4777         cik_cp_gfx_fini(rdev);
4778         cik_cp_compute_fini(rdev);
4779 }
4780
4781 static int cik_cp_resume(struct radeon_device *rdev)
4782 {
4783         int r;
4784
4785         cik_enable_gui_idle_interrupt(rdev, false);
4786
4787         r = cik_cp_load_microcode(rdev);
4788         if (r)
4789                 return r;
4790
4791         r = cik_cp_gfx_resume(rdev);
4792         if (r)
4793                 return r;
4794         r = cik_cp_compute_resume(rdev);
4795         if (r)
4796                 return r;
4797
4798         cik_enable_gui_idle_interrupt(rdev, true);
4799
4800         return 0;
4801 }
4802
4803 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4804 {
4805         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4806                 RREG32(GRBM_STATUS));
4807         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4808                 RREG32(GRBM_STATUS2));
4809         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4810                 RREG32(GRBM_STATUS_SE0));
4811         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4812                 RREG32(GRBM_STATUS_SE1));
4813         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4814                 RREG32(GRBM_STATUS_SE2));
4815         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4816                 RREG32(GRBM_STATUS_SE3));
4817         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4818                 RREG32(SRBM_STATUS));
4819         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4820                 RREG32(SRBM_STATUS2));
4821         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4822                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4823         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4824                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4825         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4826         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4827                  RREG32(CP_STALLED_STAT1));
4828         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4829                  RREG32(CP_STALLED_STAT2));
4830         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4831                  RREG32(CP_STALLED_STAT3));
4832         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4833                  RREG32(CP_CPF_BUSY_STAT));
4834         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4835                  RREG32(CP_CPF_STALLED_STAT1));
4836         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4837         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4838         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4839                  RREG32(CP_CPC_STALLED_STAT1));
4840         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4841 }
4842
4843 /**
4844  * cik_gpu_check_soft_reset - check which blocks are busy
4845  *
4846  * @rdev: radeon_device pointer
4847  *
4848  * Check which blocks are busy and return the relevant reset
4849  * mask to be used by cik_gpu_soft_reset().
4850  * Returns a mask of the blocks to be reset.
4851  */
4852 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4853 {
4854         u32 reset_mask = 0;
4855         u32 tmp;
4856
4857         /* GRBM_STATUS */
4858         tmp = RREG32(GRBM_STATUS);
4859         if (tmp & (PA_BUSY | SC_BUSY |
4860                    BCI_BUSY | SX_BUSY |
4861                    TA_BUSY | VGT_BUSY |
4862                    DB_BUSY | CB_BUSY |
4863                    GDS_BUSY | SPI_BUSY |
4864                    IA_BUSY | IA_BUSY_NO_DMA))
4865                 reset_mask |= RADEON_RESET_GFX;
4866
4867         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4868                 reset_mask |= RADEON_RESET_CP;
4869
4870         /* GRBM_STATUS2 */
4871         tmp = RREG32(GRBM_STATUS2);
4872         if (tmp & RLC_BUSY)
4873                 reset_mask |= RADEON_RESET_RLC;
4874
4875         /* SDMA0_STATUS_REG */
4876         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4877         if (!(tmp & SDMA_IDLE))
4878                 reset_mask |= RADEON_RESET_DMA;
4879
4880         /* SDMA1_STATUS_REG */
4881         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4882         if (!(tmp & SDMA_IDLE))
4883                 reset_mask |= RADEON_RESET_DMA1;
4884
4885         /* SRBM_STATUS2 */
4886         tmp = RREG32(SRBM_STATUS2);
4887         if (tmp & SDMA_BUSY)
4888                 reset_mask |= RADEON_RESET_DMA;
4889
4890         if (tmp & SDMA1_BUSY)
4891                 reset_mask |= RADEON_RESET_DMA1;
4892
4893         /* SRBM_STATUS */
4894         tmp = RREG32(SRBM_STATUS);
4895
4896         if (tmp & IH_BUSY)
4897                 reset_mask |= RADEON_RESET_IH;
4898
4899         if (tmp & SEM_BUSY)
4900                 reset_mask |= RADEON_RESET_SEM;
4901
4902         if (tmp & GRBM_RQ_PENDING)
4903                 reset_mask |= RADEON_RESET_GRBM;
4904
4905         if (tmp & VMC_BUSY)
4906                 reset_mask |= RADEON_RESET_VMC;
4907
4908         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4909                    MCC_BUSY | MCD_BUSY))
4910                 reset_mask |= RADEON_RESET_MC;
4911
4912         if (evergreen_is_display_hung(rdev))
4913                 reset_mask |= RADEON_RESET_DISPLAY;
4914
4915         /* Skip MC reset as it's mostly likely not hung, just busy */
4916         if (reset_mask & RADEON_RESET_MC) {
4917                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4918                 reset_mask &= ~RADEON_RESET_MC;
4919         }
4920
4921         return reset_mask;
4922 }
4923
4924 /**
4925  * cik_gpu_soft_reset - soft reset GPU
4926  *
4927  * @rdev: radeon_device pointer
4928  * @reset_mask: mask of which blocks to reset
4929  *
4930  * Soft reset the blocks specified in @reset_mask.
4931  */
4932 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4933 {
4934         struct evergreen_mc_save save;
4935         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4936         u32 tmp;
4937
4938         if (reset_mask == 0)
4939                 return;
4940
4941         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4942
4943         cik_print_gpu_status_regs(rdev);
4944         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4945                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4946         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4947                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4948
4949         /* disable CG/PG */
4950         cik_fini_pg(rdev);
4951         cik_fini_cg(rdev);
4952
4953         /* stop the rlc */
4954         cik_rlc_stop(rdev);
4955
4956         /* Disable GFX parsing/prefetching */
4957         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4958
4959         /* Disable MEC parsing/prefetching */
4960         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4961
4962         if (reset_mask & RADEON_RESET_DMA) {
4963                 /* sdma0 */
4964                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4965                 tmp |= SDMA_HALT;
4966                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4967         }
4968         if (reset_mask & RADEON_RESET_DMA1) {
4969                 /* sdma1 */
4970                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4971                 tmp |= SDMA_HALT;
4972                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4973         }
4974
4975         evergreen_mc_stop(rdev, &save);
4976         if (evergreen_mc_wait_for_idle(rdev)) {
4977                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4978         }
4979
4980         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4981                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4982
4983         if (reset_mask & RADEON_RESET_CP) {
4984                 grbm_soft_reset |= SOFT_RESET_CP;
4985
4986                 srbm_soft_reset |= SOFT_RESET_GRBM;
4987         }
4988
4989         if (reset_mask & RADEON_RESET_DMA)
4990                 srbm_soft_reset |= SOFT_RESET_SDMA;
4991
4992         if (reset_mask & RADEON_RESET_DMA1)
4993                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4994
4995         if (reset_mask & RADEON_RESET_DISPLAY)
4996                 srbm_soft_reset |= SOFT_RESET_DC;
4997
4998         if (reset_mask & RADEON_RESET_RLC)
4999                 grbm_soft_reset |= SOFT_RESET_RLC;
5000
5001         if (reset_mask & RADEON_RESET_SEM)
5002                 srbm_soft_reset |= SOFT_RESET_SEM;
5003
5004         if (reset_mask & RADEON_RESET_IH)
5005                 srbm_soft_reset |= SOFT_RESET_IH;
5006
5007         if (reset_mask & RADEON_RESET_GRBM)
5008                 srbm_soft_reset |= SOFT_RESET_GRBM;
5009
5010         if (reset_mask & RADEON_RESET_VMC)
5011                 srbm_soft_reset |= SOFT_RESET_VMC;
5012
5013         if (!(rdev->flags & RADEON_IS_IGP)) {
5014                 if (reset_mask & RADEON_RESET_MC)
5015                         srbm_soft_reset |= SOFT_RESET_MC;
5016         }
5017
5018         if (grbm_soft_reset) {
5019                 tmp = RREG32(GRBM_SOFT_RESET);
5020                 tmp |= grbm_soft_reset;
5021                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5022                 WREG32(GRBM_SOFT_RESET, tmp);
5023                 tmp = RREG32(GRBM_SOFT_RESET);
5024
5025                 udelay(50);
5026
5027                 tmp &= ~grbm_soft_reset;
5028                 WREG32(GRBM_SOFT_RESET, tmp);
5029                 tmp = RREG32(GRBM_SOFT_RESET);
5030         }
5031
5032         if (srbm_soft_reset) {
5033                 tmp = RREG32(SRBM_SOFT_RESET);
5034                 tmp |= srbm_soft_reset;
5035                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5036                 WREG32(SRBM_SOFT_RESET, tmp);
5037                 tmp = RREG32(SRBM_SOFT_RESET);
5038
5039                 udelay(50);
5040
5041                 tmp &= ~srbm_soft_reset;
5042                 WREG32(SRBM_SOFT_RESET, tmp);
5043                 tmp = RREG32(SRBM_SOFT_RESET);
5044         }
5045
5046         /* Wait a little for things to settle down */
5047         udelay(50);
5048
5049         evergreen_mc_resume(rdev, &save);
5050         udelay(50);
5051
5052         cik_print_gpu_status_regs(rdev);
5053 }
5054
5055 struct kv_reset_save_regs {
5056         u32 gmcon_reng_execute;
5057         u32 gmcon_misc;
5058         u32 gmcon_misc3;
5059 };
5060
5061 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5062                                    struct kv_reset_save_regs *save)
5063 {
5064         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5065         save->gmcon_misc = RREG32(GMCON_MISC);
5066         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5067
5068         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5069         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5070                                                 STCTRL_STUTTER_EN));
5071 }
5072
5073 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5074                                       struct kv_reset_save_regs *save)
5075 {
5076         int i;
5077
5078         WREG32(GMCON_PGFSM_WRITE, 0);
5079         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5080
5081         for (i = 0; i < 5; i++)
5082                 WREG32(GMCON_PGFSM_WRITE, 0);
5083
5084         WREG32(GMCON_PGFSM_WRITE, 0);
5085         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5086
5087         for (i = 0; i < 5; i++)
5088                 WREG32(GMCON_PGFSM_WRITE, 0);
5089
5090         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5091         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5092
5093         for (i = 0; i < 5; i++)
5094                 WREG32(GMCON_PGFSM_WRITE, 0);
5095
5096         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5097         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5098
5099         for (i = 0; i < 5; i++)
5100                 WREG32(GMCON_PGFSM_WRITE, 0);
5101
5102         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5103         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5104
5105         for (i = 0; i < 5; i++)
5106                 WREG32(GMCON_PGFSM_WRITE, 0);
5107
5108         WREG32(GMCON_PGFSM_WRITE, 0);
5109         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5110
5111         for (i = 0; i < 5; i++)
5112                 WREG32(GMCON_PGFSM_WRITE, 0);
5113
5114         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5115         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5116
5117         for (i = 0; i < 5; i++)
5118                 WREG32(GMCON_PGFSM_WRITE, 0);
5119
5120         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5121         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5122
5123         for (i = 0; i < 5; i++)
5124                 WREG32(GMCON_PGFSM_WRITE, 0);
5125
5126         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5127         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5128
5129         for (i = 0; i < 5; i++)
5130                 WREG32(GMCON_PGFSM_WRITE, 0);
5131
5132         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5133         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5134
5135         for (i = 0; i < 5; i++)
5136                 WREG32(GMCON_PGFSM_WRITE, 0);
5137
5138         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5139         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5140
5141         WREG32(GMCON_MISC3, save->gmcon_misc3);
5142         WREG32(GMCON_MISC, save->gmcon_misc);
5143         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5144 }
5145
5146 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5147 {
5148         struct evergreen_mc_save save;
5149         struct kv_reset_save_regs kv_save = { 0 };
5150         u32 tmp, i;
5151
5152         dev_info(rdev->dev, "GPU pci config reset\n");
5153
5154         /* disable dpm? */
5155
5156         /* disable cg/pg */
5157         cik_fini_pg(rdev);
5158         cik_fini_cg(rdev);
5159
5160         /* Disable GFX parsing/prefetching */
5161         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5162
5163         /* Disable MEC parsing/prefetching */
5164         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5165
5166         /* sdma0 */
5167         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5168         tmp |= SDMA_HALT;
5169         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5170         /* sdma1 */
5171         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5172         tmp |= SDMA_HALT;
5173         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5174         /* XXX other engines? */
5175
5176         /* halt the rlc, disable cp internal ints */
5177         cik_rlc_stop(rdev);
5178
5179         udelay(50);
5180
5181         /* disable mem access */
5182         evergreen_mc_stop(rdev, &save);
5183         if (evergreen_mc_wait_for_idle(rdev)) {
5184                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5185         }
5186
5187         if (rdev->flags & RADEON_IS_IGP)
5188                 kv_save_regs_for_reset(rdev, &kv_save);
5189
5190         /* disable BM */
5191         pci_clear_master(rdev->pdev);
5192         /* reset */
5193         radeon_pci_config_reset(rdev);
5194
5195         udelay(100);
5196
5197         /* wait for asic to come out of reset */
5198         for (i = 0; i < rdev->usec_timeout; i++) {
5199                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5200                         break;
5201                 udelay(1);
5202         }
5203
5204         /* does asic init need to be run first??? */
5205         if (rdev->flags & RADEON_IS_IGP)
5206                 kv_restore_regs_for_reset(rdev, &kv_save);
5207 }
5208
5209 /**
5210  * cik_asic_reset - soft reset GPU
5211  *
5212  * @rdev: radeon_device pointer
5213  * @hard: force hard reset
5214  *
5215  * Look up which blocks are hung and attempt
5216  * to reset them.
5217  * Returns 0 for success.
5218  */
5219 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5220 {
5221         u32 reset_mask;
5222
5223         if (hard) {
5224                 cik_gpu_pci_config_reset(rdev);
5225                 return 0;
5226         }
5227
5228         reset_mask = cik_gpu_check_soft_reset(rdev);
5229
5230         if (reset_mask)
5231                 r600_set_bios_scratch_engine_hung(rdev, true);
5232
5233         /* try soft reset */
5234         cik_gpu_soft_reset(rdev, reset_mask);
5235
5236         reset_mask = cik_gpu_check_soft_reset(rdev);
5237
5238         /* try pci config reset */
5239         if (reset_mask && radeon_hard_reset)
5240                 cik_gpu_pci_config_reset(rdev);
5241
5242         reset_mask = cik_gpu_check_soft_reset(rdev);
5243
5244         if (!reset_mask)
5245                 r600_set_bios_scratch_engine_hung(rdev, false);
5246
5247         return 0;
5248 }
5249
5250 /**
5251  * cik_gfx_is_lockup - check if the 3D engine is locked up
5252  *
5253  * @rdev: radeon_device pointer
5254  * @ring: radeon_ring structure holding ring information
5255  *
5256  * Check if the 3D engine is locked up (CIK).
5257  * Returns true if the engine is locked, false if not.
5258  */
5259 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5260 {
5261         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5262
5263         if (!(reset_mask & (RADEON_RESET_GFX |
5264                             RADEON_RESET_COMPUTE |
5265                             RADEON_RESET_CP))) {
5266                 radeon_ring_lockup_update(rdev, ring);
5267                 return false;
5268         }
5269         return radeon_ring_test_lockup(rdev, ring);
5270 }
5271
5272 /* MC */
5273 /**
5274  * cik_mc_program - program the GPU memory controller
5275  *
5276  * @rdev: radeon_device pointer
5277  *
5278  * Set the location of vram, gart, and AGP in the GPU's
5279  * physical address space (CIK).
5280  */
5281 static void cik_mc_program(struct radeon_device *rdev)
5282 {
5283         struct evergreen_mc_save save;
5284         u32 tmp;
5285         int i, j;
5286
5287         /* Initialize HDP */
5288         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5289                 WREG32((0x2c14 + j), 0x00000000);
5290                 WREG32((0x2c18 + j), 0x00000000);
5291                 WREG32((0x2c1c + j), 0x00000000);
5292                 WREG32((0x2c20 + j), 0x00000000);
5293                 WREG32((0x2c24 + j), 0x00000000);
5294         }
5295         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5296
5297         evergreen_mc_stop(rdev, &save);
5298         if (radeon_mc_wait_for_idle(rdev)) {
5299                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5300         }
5301         /* Lockout access through VGA aperture*/
5302         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5303         /* Update configuration */
5304         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5305                rdev->mc.vram_start >> 12);
5306         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5307                rdev->mc.vram_end >> 12);
5308         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5309                rdev->vram_scratch.gpu_addr >> 12);
5310         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5311         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5312         WREG32(MC_VM_FB_LOCATION, tmp);
5313         /* XXX double check these! */
5314         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5315         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5316         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5317         WREG32(MC_VM_AGP_BASE, 0);
5318         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5319         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5320         if (radeon_mc_wait_for_idle(rdev)) {
5321                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5322         }
5323         evergreen_mc_resume(rdev, &save);
5324         /* we need to own VRAM, so turn off the VGA renderer here
5325          * to stop it overwriting our objects */
5326         rv515_vga_render_disable(rdev);
5327 }
5328
5329 /**
5330  * cik_mc_init - initialize the memory controller driver params
5331  *
5332  * @rdev: radeon_device pointer
5333  *
5334  * Look up the amount of vram, vram width, and decide how to place
5335  * vram and gart within the GPU's physical address space (CIK).
5336  * Returns 0 for success.
5337  */
5338 static int cik_mc_init(struct radeon_device *rdev)
5339 {
5340         u32 tmp;
5341         int chansize, numchan;
5342
5343         /* Get VRAM informations */
5344         rdev->mc.vram_is_ddr = true;
5345         tmp = RREG32(MC_ARB_RAMCFG);
5346         if (tmp & CHANSIZE_MASK) {
5347                 chansize = 64;
5348         } else {
5349                 chansize = 32;
5350         }
5351         tmp = RREG32(MC_SHARED_CHMAP);
5352         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5353         case 0:
5354         default:
5355                 numchan = 1;
5356                 break;
5357         case 1:
5358                 numchan = 2;
5359                 break;
5360         case 2:
5361                 numchan = 4;
5362                 break;
5363         case 3:
5364                 numchan = 8;
5365                 break;
5366         case 4:
5367                 numchan = 3;
5368                 break;
5369         case 5:
5370                 numchan = 6;
5371                 break;
5372         case 6:
5373                 numchan = 10;
5374                 break;
5375         case 7:
5376                 numchan = 12;
5377                 break;
5378         case 8:
5379                 numchan = 16;
5380                 break;
5381         }
5382         rdev->mc.vram_width = numchan * chansize;
5383         /* Could aper size report 0 ? */
5384         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5385         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5386         /* size in MB on si */
5387         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5388         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5389         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5390         si_vram_gtt_location(rdev, &rdev->mc);
5391         radeon_update_bandwidth_info(rdev);
5392
5393         return 0;
5394 }
5395
5396 /*
5397  * GART
5398  * VMID 0 is the physical GPU addresses as used by the kernel.
5399  * VMIDs 1-15 are used for userspace clients and are handled
5400  * by the radeon vm/hsa code.
5401  */
5402 /**
5403  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5404  *
5405  * @rdev: radeon_device pointer
5406  *
5407  * Flush the TLB for the VMID 0 page table (CIK).
5408  */
5409 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5410 {
5411         /* flush hdp cache */
5412         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5413
5414         /* bits 0-15 are the VM contexts0-15 */
5415         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5416 }
5417
5418 /**
5419  * cik_pcie_gart_enable - gart enable
5420  *
5421  * @rdev: radeon_device pointer
5422  *
5423  * This sets up the TLBs, programs the page tables for VMID0,
5424  * sets up the hw for VMIDs 1-15 which are allocated on
5425  * demand, and sets up the global locations for the LDS, GDS,
5426  * and GPUVM for FSA64 clients (CIK).
5427  * Returns 0 for success, errors for failure.
5428  */
5429 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5430 {
5431         int r, i;
5432
5433         if (rdev->gart.robj == NULL) {
5434                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5435                 return -EINVAL;
5436         }
5437         r = radeon_gart_table_vram_pin(rdev);
5438         if (r)
5439                 return r;
5440         /* Setup TLB control */
5441         WREG32(MC_VM_MX_L1_TLB_CNTL,
5442                (0xA << 7) |
5443                ENABLE_L1_TLB |
5444                ENABLE_L1_FRAGMENT_PROCESSING |
5445                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5446                ENABLE_ADVANCED_DRIVER_MODEL |
5447                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5448         /* Setup L2 cache */
5449         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5450                ENABLE_L2_FRAGMENT_PROCESSING |
5451                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5452                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5453                EFFECTIVE_L2_QUEUE_SIZE(7) |
5454                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5455         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5456         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5457                BANK_SELECT(4) |
5458                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5459         /* setup context0 */
5460         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5461         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5462         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5463         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5464                         (u32)(rdev->dummy_page.addr >> 12));
5465         WREG32(VM_CONTEXT0_CNTL2, 0);
5466         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5467                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5468
5469         WREG32(0x15D4, 0);
5470         WREG32(0x15D8, 0);
5471         WREG32(0x15DC, 0);
5472
5473         /* restore context1-15 */
5474         /* set vm size, must be a multiple of 4 */
5475         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5476         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5477         for (i = 1; i < 16; i++) {
5478                 if (i < 8)
5479                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5480                                rdev->vm_manager.saved_table_addr[i]);
5481                 else
5482                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5483                                rdev->vm_manager.saved_table_addr[i]);
5484         }
5485
5486         /* enable context1-15 */
5487         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5488                (u32)(rdev->dummy_page.addr >> 12));
5489         WREG32(VM_CONTEXT1_CNTL2, 4);
5490         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5491                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5492                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5493                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5494                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5495                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5496                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5497                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5498                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5499                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5500                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5501                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5502                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5503                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5504
5505         if (rdev->family == CHIP_KAVERI) {
5506                 u32 tmp = RREG32(CHUB_CONTROL);
5507                 tmp &= ~BYPASS_VM;
5508                 WREG32(CHUB_CONTROL, tmp);
5509         }
5510
5511         /* XXX SH_MEM regs */
5512         /* where to put LDS, scratch, GPUVM in FSA64 space */
5513         mutex_lock(&rdev->srbm_mutex);
5514         for (i = 0; i < 16; i++) {
5515                 cik_srbm_select(rdev, 0, 0, 0, i);
5516                 /* CP and shaders */
5517                 WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5518                 WREG32(SH_MEM_APE1_BASE, 1);
5519                 WREG32(SH_MEM_APE1_LIMIT, 0);
5520                 WREG32(SH_MEM_BASES, 0);
5521                 /* SDMA GFX */
5522                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5523                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5524                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5525                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5526                 /* XXX SDMA RLC - todo */
5527         }
5528         cik_srbm_select(rdev, 0, 0, 0, 0);
5529         mutex_unlock(&rdev->srbm_mutex);
5530
5531         cik_pcie_gart_tlb_flush(rdev);
5532         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5533                  (unsigned)(rdev->mc.gtt_size >> 20),
5534                  (unsigned long long)rdev->gart.table_addr);
5535         rdev->gart.ready = true;
5536         return 0;
5537 }
5538
5539 /**
5540  * cik_pcie_gart_disable - gart disable
5541  *
5542  * @rdev: radeon_device pointer
5543  *
5544  * This disables all VM page table (CIK).
5545  */
5546 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5547 {
5548         unsigned i;
5549
5550         for (i = 1; i < 16; ++i) {
5551                 uint32_t reg;
5552                 if (i < 8)
5553                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5554                 else
5555                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5556                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5557         }
5558
5559         /* Disable all tables */
5560         WREG32(VM_CONTEXT0_CNTL, 0);
5561         WREG32(VM_CONTEXT1_CNTL, 0);
5562         /* Setup TLB control */
5563         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5564                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5565         /* Setup L2 cache */
5566         WREG32(VM_L2_CNTL,
5567                ENABLE_L2_FRAGMENT_PROCESSING |
5568                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5569                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5570                EFFECTIVE_L2_QUEUE_SIZE(7) |
5571                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5572         WREG32(VM_L2_CNTL2, 0);
5573         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5574                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5575         radeon_gart_table_vram_unpin(rdev);
5576 }
5577
5578 /**
5579  * cik_pcie_gart_fini - vm fini callback
5580  *
5581  * @rdev: radeon_device pointer
5582  *
5583  * Tears down the driver GART/VM setup (CIK).
5584  */
5585 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5586 {
5587         cik_pcie_gart_disable(rdev);
5588         radeon_gart_table_vram_free(rdev);
5589         radeon_gart_fini(rdev);
5590 }
5591
5592 /* vm parser */
5593 /**
5594  * cik_ib_parse - vm ib_parse callback
5595  *
5596  * @rdev: radeon_device pointer
5597  * @ib: indirect buffer pointer
5598  *
5599  * CIK uses hw IB checking so this is a nop (CIK).
5600  */
5601 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5602 {
5603         return 0;
5604 }
5605
5606 /*
5607  * vm
5608  * VMID 0 is the physical GPU addresses as used by the kernel.
5609  * VMIDs 1-15 are used for userspace clients and are handled
5610  * by the radeon vm/hsa code.
5611  */
5612 /**
5613  * cik_vm_init - cik vm init callback
5614  *
5615  * @rdev: radeon_device pointer
5616  *
5617  * Inits cik specific vm parameters (number of VMs, base of vram for
5618  * VMIDs 1-15) (CIK).
5619  * Returns 0 for success.
5620  */
5621 int cik_vm_init(struct radeon_device *rdev)
5622 {
5623         /*
5624          * number of VMs
5625          * VMID 0 is reserved for System
5626          * radeon graphics/compute will use VMIDs 1-15
5627          */
5628         rdev->vm_manager.nvm = 16;
5629         /* base offset of vram pages */
5630         if (rdev->flags & RADEON_IS_IGP) {
5631                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5632                 tmp <<= 22;
5633                 rdev->vm_manager.vram_base_offset = tmp;
5634         } else
5635                 rdev->vm_manager.vram_base_offset = 0;
5636
5637         return 0;
5638 }
5639
5640 /**
5641  * cik_vm_fini - cik vm fini callback
5642  *
5643  * @rdev: radeon_device pointer
5644  *
5645  * Tear down any asic specific VM setup (CIK).
5646  */
5647 void cik_vm_fini(struct radeon_device *rdev)
5648 {
5649 }
5650
5651 /**
5652  * cik_vm_decode_fault - print human readable fault info
5653  *
5654  * @rdev: radeon_device pointer
5655  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5656  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5657  *
5658  * Print human readable fault information (CIK).
5659  */
5660 static void cik_vm_decode_fault(struct radeon_device *rdev,
5661                                 u32 status, u32 addr, u32 mc_client)
5662 {
5663         u32 mc_id;
5664         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5665         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5666         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5667                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5668
5669         if (rdev->family == CHIP_HAWAII)
5670                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5671         else
5672                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5673
5674         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5675                protections, vmid, addr,
5676                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5677                block, mc_client, mc_id);
5678 }
5679
5680 /**
5681  * cik_vm_flush - cik vm flush using the CP
5682  *
5683  * @rdev: radeon_device pointer
5684  *
5685  * Update the page table base and flush the VM TLB
5686  * using the CP (CIK).
5687  */
5688 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5689                   unsigned vm_id, uint64_t pd_addr)
5690 {
5691         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5692
5693         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5694         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5695                                  WRITE_DATA_DST_SEL(0)));
5696         if (vm_id < 8) {
5697                 radeon_ring_write(ring,
5698                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5699         } else {
5700                 radeon_ring_write(ring,
5701                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5702         }
5703         radeon_ring_write(ring, 0);
5704         radeon_ring_write(ring, pd_addr >> 12);
5705
5706         /* update SH_MEM_* regs */
5707         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5708         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5709                                  WRITE_DATA_DST_SEL(0)));
5710         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5711         radeon_ring_write(ring, 0);
5712         radeon_ring_write(ring, VMID(vm_id));
5713
5714         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5715         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5716                                  WRITE_DATA_DST_SEL(0)));
5717         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5718         radeon_ring_write(ring, 0);
5719
5720         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5721         radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5722         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5723         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5724
5725         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5726         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5727                                  WRITE_DATA_DST_SEL(0)));
5728         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5729         radeon_ring_write(ring, 0);
5730         radeon_ring_write(ring, VMID(0));
5731
5732         /* HDP flush */
5733         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5734
5735         /* bits 0-15 are the VM contexts0-15 */
5736         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5737         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5738                                  WRITE_DATA_DST_SEL(0)));
5739         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5740         radeon_ring_write(ring, 0);
5741         radeon_ring_write(ring, 1 << vm_id);
5742
5743         /* wait for the invalidate to complete */
5744         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5745         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5746                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5747                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5748         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5749         radeon_ring_write(ring, 0);
5750         radeon_ring_write(ring, 0); /* ref */
5751         radeon_ring_write(ring, 0); /* mask */
5752         radeon_ring_write(ring, 0x20); /* poll interval */
5753
5754         /* compute doesn't have PFP */
5755         if (usepfp) {
5756                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5757                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5758                 radeon_ring_write(ring, 0x0);
5759         }
5760 }
5761
5762 /*
5763  * RLC
5764  * The RLC is a multi-purpose microengine that handles a
5765  * variety of functions, the most important of which is
5766  * the interrupt controller.
5767  */
5768 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5769                                           bool enable)
5770 {
5771         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5772
5773         if (enable)
5774                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5775         else
5776                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5777         WREG32(CP_INT_CNTL_RING0, tmp);
5778 }
5779
5780 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5781 {
5782         u32 tmp;
5783
5784         tmp = RREG32(RLC_LB_CNTL);
5785         if (enable)
5786                 tmp |= LOAD_BALANCE_ENABLE;
5787         else
5788                 tmp &= ~LOAD_BALANCE_ENABLE;
5789         WREG32(RLC_LB_CNTL, tmp);
5790 }
5791
5792 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5793 {
5794         u32 i, j, k;
5795         u32 mask;
5796
5797         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5798                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5799                         cik_select_se_sh(rdev, i, j);
5800                         for (k = 0; k < rdev->usec_timeout; k++) {
5801                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5802                                         break;
5803                                 udelay(1);
5804                         }
5805                 }
5806         }
5807         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5808
5809         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5810         for (k = 0; k < rdev->usec_timeout; k++) {
5811                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5812                         break;
5813                 udelay(1);
5814         }
5815 }
5816
5817 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5818 {
5819         u32 tmp;
5820
5821         tmp = RREG32(RLC_CNTL);
5822         if (tmp != rlc)
5823                 WREG32(RLC_CNTL, rlc);
5824 }
5825
5826 static u32 cik_halt_rlc(struct radeon_device *rdev)
5827 {
5828         u32 data, orig;
5829
5830         orig = data = RREG32(RLC_CNTL);
5831
5832         if (data & RLC_ENABLE) {
5833                 u32 i;
5834
5835                 data &= ~RLC_ENABLE;
5836                 WREG32(RLC_CNTL, data);
5837
5838                 for (i = 0; i < rdev->usec_timeout; i++) {
5839                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5840                                 break;
5841                         udelay(1);
5842                 }
5843
5844                 cik_wait_for_rlc_serdes(rdev);
5845         }
5846
5847         return orig;
5848 }
5849
5850 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5851 {
5852         u32 tmp, i, mask;
5853
5854         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5855         WREG32(RLC_GPR_REG2, tmp);
5856
5857         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5858         for (i = 0; i < rdev->usec_timeout; i++) {
5859                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5860                         break;
5861                 udelay(1);
5862         }
5863
5864         for (i = 0; i < rdev->usec_timeout; i++) {
5865                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5866                         break;
5867                 udelay(1);
5868         }
5869 }
5870
5871 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5872 {
5873         u32 tmp;
5874
5875         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5876         WREG32(RLC_GPR_REG2, tmp);
5877 }
5878
5879 /**
5880  * cik_rlc_stop - stop the RLC ME
5881  *
5882  * @rdev: radeon_device pointer
5883  *
5884  * Halt the RLC ME (MicroEngine) (CIK).
5885  */
5886 static void cik_rlc_stop(struct radeon_device *rdev)
5887 {
5888         WREG32(RLC_CNTL, 0);
5889
5890         cik_enable_gui_idle_interrupt(rdev, false);
5891
5892         cik_wait_for_rlc_serdes(rdev);
5893 }
5894
5895 /**
5896  * cik_rlc_start - start the RLC ME
5897  *
5898  * @rdev: radeon_device pointer
5899  *
5900  * Unhalt the RLC ME (MicroEngine) (CIK).
5901  */
5902 static void cik_rlc_start(struct radeon_device *rdev)
5903 {
5904         WREG32(RLC_CNTL, RLC_ENABLE);
5905
5906         cik_enable_gui_idle_interrupt(rdev, true);
5907
5908         udelay(50);
5909 }
5910
5911 /**
5912  * cik_rlc_resume - setup the RLC hw
5913  *
5914  * @rdev: radeon_device pointer
5915  *
5916  * Initialize the RLC registers, load the ucode,
5917  * and start the RLC (CIK).
5918  * Returns 0 for success, -EINVAL if the ucode is not available.
5919  */
5920 static int cik_rlc_resume(struct radeon_device *rdev)
5921 {
5922         u32 i, size, tmp;
5923
5924         if (!rdev->rlc_fw)
5925                 return -EINVAL;
5926
5927         cik_rlc_stop(rdev);
5928
5929         /* disable CG */
5930         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5931         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5932
5933         si_rlc_reset(rdev);
5934
5935         cik_init_pg(rdev);
5936
5937         cik_init_cg(rdev);
5938
5939         WREG32(RLC_LB_CNTR_INIT, 0);
5940         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5941
5942         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5943         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5944         WREG32(RLC_LB_PARAMS, 0x00600408);
5945         WREG32(RLC_LB_CNTL, 0x80000004);
5946
5947         WREG32(RLC_MC_CNTL, 0);
5948         WREG32(RLC_UCODE_CNTL, 0);
5949
5950         if (rdev->new_fw) {
5951                 const struct rlc_firmware_header_v1_0 *hdr =
5952                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5953                 const __le32 *fw_data = (const __le32 *)
5954                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5955
5956                 radeon_ucode_print_rlc_hdr(&hdr->header);
5957
5958                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5959                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5960                 for (i = 0; i < size; i++)
5961                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5962                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5963         } else {
5964                 const __be32 *fw_data;
5965
5966                 switch (rdev->family) {
5967                 case CHIP_BONAIRE:
5968                 case CHIP_HAWAII:
5969                 default:
5970                         size = BONAIRE_RLC_UCODE_SIZE;
5971                         break;
5972                 case CHIP_KAVERI:
5973                         size = KV_RLC_UCODE_SIZE;
5974                         break;
5975                 case CHIP_KABINI:
5976                         size = KB_RLC_UCODE_SIZE;
5977                         break;
5978                 case CHIP_MULLINS:
5979                         size = ML_RLC_UCODE_SIZE;
5980                         break;
5981                 }
5982
5983                 fw_data = (const __be32 *)rdev->rlc_fw->data;
5984                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5985                 for (i = 0; i < size; i++)
5986                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5987                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5988         }
5989
5990         /* XXX - find out what chips support lbpw */
5991         cik_enable_lbpw(rdev, false);
5992
5993         if (rdev->family == CHIP_BONAIRE)
5994                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5995
5996         cik_rlc_start(rdev);
5997
5998         return 0;
5999 }
6000
6001 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6002 {
6003         u32 data, orig, tmp, tmp2;
6004
6005         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6006
6007         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6008                 cik_enable_gui_idle_interrupt(rdev, true);
6009
6010                 tmp = cik_halt_rlc(rdev);
6011
6012                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6013                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6014                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6015                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6016                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6017
6018                 cik_update_rlc(rdev, tmp);
6019
6020                 data |= CGCG_EN | CGLS_EN;
6021         } else {
6022                 cik_enable_gui_idle_interrupt(rdev, false);
6023
6024                 RREG32(CB_CGTT_SCLK_CTRL);
6025                 RREG32(CB_CGTT_SCLK_CTRL);
6026                 RREG32(CB_CGTT_SCLK_CTRL);
6027                 RREG32(CB_CGTT_SCLK_CTRL);
6028
6029                 data &= ~(CGCG_EN | CGLS_EN);
6030         }
6031
6032         if (orig != data)
6033                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6034
6035 }
6036
6037 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6038 {
6039         u32 data, orig, tmp = 0;
6040
6041         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6042                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6043                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6044                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6045                                 data |= CP_MEM_LS_EN;
6046                                 if (orig != data)
6047                                         WREG32(CP_MEM_SLP_CNTL, data);
6048                         }
6049                 }
6050
6051                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6052                 data |= 0x00000001;
6053                 data &= 0xfffffffd;
6054                 if (orig != data)
6055                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6056
6057                 tmp = cik_halt_rlc(rdev);
6058
6059                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6060                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6061                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6062                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6063                 WREG32(RLC_SERDES_WR_CTRL, data);
6064
6065                 cik_update_rlc(rdev, tmp);
6066
6067                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6068                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6069                         data &= ~SM_MODE_MASK;
6070                         data |= SM_MODE(0x2);
6071                         data |= SM_MODE_ENABLE;
6072                         data &= ~CGTS_OVERRIDE;
6073                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6074                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6075                                 data &= ~CGTS_LS_OVERRIDE;
6076                         data &= ~ON_MONITOR_ADD_MASK;
6077                         data |= ON_MONITOR_ADD_EN;
6078                         data |= ON_MONITOR_ADD(0x96);
6079                         if (orig != data)
6080                                 WREG32(CGTS_SM_CTRL_REG, data);
6081                 }
6082         } else {
6083                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6084                 data |= 0x00000003;
6085                 if (orig != data)
6086                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6087
6088                 data = RREG32(RLC_MEM_SLP_CNTL);
6089                 if (data & RLC_MEM_LS_EN) {
6090                         data &= ~RLC_MEM_LS_EN;
6091                         WREG32(RLC_MEM_SLP_CNTL, data);
6092                 }
6093
6094                 data = RREG32(CP_MEM_SLP_CNTL);
6095                 if (data & CP_MEM_LS_EN) {
6096                         data &= ~CP_MEM_LS_EN;
6097                         WREG32(CP_MEM_SLP_CNTL, data);
6098                 }
6099
6100                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6101                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6102                 if (orig != data)
6103                         WREG32(CGTS_SM_CTRL_REG, data);
6104
6105                 tmp = cik_halt_rlc(rdev);
6106
6107                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6108                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6109                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6110                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6111                 WREG32(RLC_SERDES_WR_CTRL, data);
6112
6113                 cik_update_rlc(rdev, tmp);
6114         }
6115 }
6116
6117 static const u32 mc_cg_registers[] =
6118 {
6119         MC_HUB_MISC_HUB_CG,
6120         MC_HUB_MISC_SIP_CG,
6121         MC_HUB_MISC_VM_CG,
6122         MC_XPB_CLK_GAT,
6123         ATC_MISC_CG,
6124         MC_CITF_MISC_WR_CG,
6125         MC_CITF_MISC_RD_CG,
6126         MC_CITF_MISC_VM_CG,
6127         VM_L2_CG,
6128 };
6129
6130 static void cik_enable_mc_ls(struct radeon_device *rdev,
6131                              bool enable)
6132 {
6133         int i;
6134         u32 orig, data;
6135
6136         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6137                 orig = data = RREG32(mc_cg_registers[i]);
6138                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6139                         data |= MC_LS_ENABLE;
6140                 else
6141                         data &= ~MC_LS_ENABLE;
6142                 if (data != orig)
6143                         WREG32(mc_cg_registers[i], data);
6144         }
6145 }
6146
6147 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6148                                bool enable)
6149 {
6150         int i;
6151         u32 orig, data;
6152
6153         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6154                 orig = data = RREG32(mc_cg_registers[i]);
6155                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6156                         data |= MC_CG_ENABLE;
6157                 else
6158                         data &= ~MC_CG_ENABLE;
6159                 if (data != orig)
6160                         WREG32(mc_cg_registers[i], data);
6161         }
6162 }
6163
6164 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6165                                  bool enable)
6166 {
6167         u32 orig, data;
6168
6169         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6170                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6171                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6172         } else {
6173                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6174                 data |= 0xff000000;
6175                 if (data != orig)
6176                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6177
6178                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6179                 data |= 0xff000000;
6180                 if (data != orig)
6181                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6182         }
6183 }
6184
6185 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6186                                  bool enable)
6187 {
6188         u32 orig, data;
6189
6190         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6191                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6192                 data |= 0x100;
6193                 if (orig != data)
6194                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6195
6196                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6197                 data |= 0x100;
6198                 if (orig != data)
6199                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6200         } else {
6201                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6202                 data &= ~0x100;
6203                 if (orig != data)
6204                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6205
6206                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6207                 data &= ~0x100;
6208                 if (orig != data)
6209                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6210         }
6211 }
6212
6213 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6214                                 bool enable)
6215 {
6216         u32 orig, data;
6217
6218         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6219                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6220                 data = 0xfff;
6221                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6222
6223                 orig = data = RREG32(UVD_CGC_CTRL);
6224                 data |= DCM;
6225                 if (orig != data)
6226                         WREG32(UVD_CGC_CTRL, data);
6227         } else {
6228                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6229                 data &= ~0xfff;
6230                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6231
6232                 orig = data = RREG32(UVD_CGC_CTRL);
6233                 data &= ~DCM;
6234                 if (orig != data)
6235                         WREG32(UVD_CGC_CTRL, data);
6236         }
6237 }
6238
6239 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6240                                bool enable)
6241 {
6242         u32 orig, data;
6243
6244         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6245
6246         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6247                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6248                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6249         else
6250                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6251                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6252
6253         if (orig != data)
6254                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6255 }
6256
6257 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6258                                 bool enable)
6259 {
6260         u32 orig, data;
6261
6262         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6263
6264         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6265                 data &= ~CLOCK_GATING_DIS;
6266         else
6267                 data |= CLOCK_GATING_DIS;
6268
6269         if (orig != data)
6270                 WREG32(HDP_HOST_PATH_CNTL, data);
6271 }
6272
6273 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6274                               bool enable)
6275 {
6276         u32 orig, data;
6277
6278         orig = data = RREG32(HDP_MEM_POWER_LS);
6279
6280         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6281                 data |= HDP_LS_ENABLE;
6282         else
6283                 data &= ~HDP_LS_ENABLE;
6284
6285         if (orig != data)
6286                 WREG32(HDP_MEM_POWER_LS, data);
6287 }
6288
6289 void cik_update_cg(struct radeon_device *rdev,
6290                    u32 block, bool enable)
6291 {
6292
6293         if (block & RADEON_CG_BLOCK_GFX) {
6294                 cik_enable_gui_idle_interrupt(rdev, false);
6295                 /* order matters! */
6296                 if (enable) {
6297                         cik_enable_mgcg(rdev, true);
6298                         cik_enable_cgcg(rdev, true);
6299                 } else {
6300                         cik_enable_cgcg(rdev, false);
6301                         cik_enable_mgcg(rdev, false);
6302                 }
6303                 cik_enable_gui_idle_interrupt(rdev, true);
6304         }
6305
6306         if (block & RADEON_CG_BLOCK_MC) {
6307                 if (!(rdev->flags & RADEON_IS_IGP)) {
6308                         cik_enable_mc_mgcg(rdev, enable);
6309                         cik_enable_mc_ls(rdev, enable);
6310                 }
6311         }
6312
6313         if (block & RADEON_CG_BLOCK_SDMA) {
6314                 cik_enable_sdma_mgcg(rdev, enable);
6315                 cik_enable_sdma_mgls(rdev, enable);
6316         }
6317
6318         if (block & RADEON_CG_BLOCK_BIF) {
6319                 cik_enable_bif_mgls(rdev, enable);
6320         }
6321
6322         if (block & RADEON_CG_BLOCK_UVD) {
6323                 if (rdev->has_uvd)
6324                         cik_enable_uvd_mgcg(rdev, enable);
6325         }
6326
6327         if (block & RADEON_CG_BLOCK_HDP) {
6328                 cik_enable_hdp_mgcg(rdev, enable);
6329                 cik_enable_hdp_ls(rdev, enable);
6330         }
6331
6332         if (block & RADEON_CG_BLOCK_VCE) {
6333                 vce_v2_0_enable_mgcg(rdev, enable);
6334         }
6335 }
6336
6337 static void cik_init_cg(struct radeon_device *rdev)
6338 {
6339
6340         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6341
6342         if (rdev->has_uvd)
6343                 si_init_uvd_internal_cg(rdev);
6344
6345         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6346                              RADEON_CG_BLOCK_SDMA |
6347                              RADEON_CG_BLOCK_BIF |
6348                              RADEON_CG_BLOCK_UVD |
6349                              RADEON_CG_BLOCK_HDP), true);
6350 }
6351
6352 static void cik_fini_cg(struct radeon_device *rdev)
6353 {
6354         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6355                              RADEON_CG_BLOCK_SDMA |
6356                              RADEON_CG_BLOCK_BIF |
6357                              RADEON_CG_BLOCK_UVD |
6358                              RADEON_CG_BLOCK_HDP), false);
6359
6360         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6361 }
6362
6363 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6364                                           bool enable)
6365 {
6366         u32 data, orig;
6367
6368         orig = data = RREG32(RLC_PG_CNTL);
6369         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6370                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6371         else
6372                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6373         if (orig != data)
6374                 WREG32(RLC_PG_CNTL, data);
6375 }
6376
6377 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6378                                           bool enable)
6379 {
6380         u32 data, orig;
6381
6382         orig = data = RREG32(RLC_PG_CNTL);
6383         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6384                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6385         else
6386                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6387         if (orig != data)
6388                 WREG32(RLC_PG_CNTL, data);
6389 }
6390
6391 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6392 {
6393         u32 data, orig;
6394
6395         orig = data = RREG32(RLC_PG_CNTL);
6396         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6397                 data &= ~DISABLE_CP_PG;
6398         else
6399                 data |= DISABLE_CP_PG;
6400         if (orig != data)
6401                 WREG32(RLC_PG_CNTL, data);
6402 }
6403
6404 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6405 {
6406         u32 data, orig;
6407
6408         orig = data = RREG32(RLC_PG_CNTL);
6409         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6410                 data &= ~DISABLE_GDS_PG;
6411         else
6412                 data |= DISABLE_GDS_PG;
6413         if (orig != data)
6414                 WREG32(RLC_PG_CNTL, data);
6415 }
6416
6417 #define CP_ME_TABLE_SIZE    96
6418 #define CP_ME_TABLE_OFFSET  2048
6419 #define CP_MEC_TABLE_OFFSET 4096
6420
6421 void cik_init_cp_pg_table(struct radeon_device *rdev)
6422 {
6423         volatile u32 *dst_ptr;
6424         int me, i, max_me = 4;
6425         u32 bo_offset = 0;
6426         u32 table_offset, table_size;
6427
6428         if (rdev->family == CHIP_KAVERI)
6429                 max_me = 5;
6430
6431         if (rdev->rlc.cp_table_ptr == NULL)
6432                 return;
6433
6434         /* write the cp table buffer */
6435         dst_ptr = rdev->rlc.cp_table_ptr;
6436         for (me = 0; me < max_me; me++) {
6437                 if (rdev->new_fw) {
6438                         const __le32 *fw_data;
6439                         const struct gfx_firmware_header_v1_0 *hdr;
6440
6441                         if (me == 0) {
6442                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6443                                 fw_data = (const __le32 *)
6444                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6445                                 table_offset = le32_to_cpu(hdr->jt_offset);
6446                                 table_size = le32_to_cpu(hdr->jt_size);
6447                         } else if (me == 1) {
6448                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6449                                 fw_data = (const __le32 *)
6450                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6451                                 table_offset = le32_to_cpu(hdr->jt_offset);
6452                                 table_size = le32_to_cpu(hdr->jt_size);
6453                         } else if (me == 2) {
6454                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6455                                 fw_data = (const __le32 *)
6456                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6457                                 table_offset = le32_to_cpu(hdr->jt_offset);
6458                                 table_size = le32_to_cpu(hdr->jt_size);
6459                         } else if (me == 3) {
6460                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6461                                 fw_data = (const __le32 *)
6462                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6463                                 table_offset = le32_to_cpu(hdr->jt_offset);
6464                                 table_size = le32_to_cpu(hdr->jt_size);
6465                         } else {
6466                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6467                                 fw_data = (const __le32 *)
6468                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6469                                 table_offset = le32_to_cpu(hdr->jt_offset);
6470                                 table_size = le32_to_cpu(hdr->jt_size);
6471                         }
6472
6473                         for (i = 0; i < table_size; i ++) {
6474                                 dst_ptr[bo_offset + i] =
6475                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6476                         }
6477                         bo_offset += table_size;
6478                 } else {
6479                         const __be32 *fw_data;
6480                         table_size = CP_ME_TABLE_SIZE;
6481
6482                         if (me == 0) {
6483                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6484                                 table_offset = CP_ME_TABLE_OFFSET;
6485                         } else if (me == 1) {
6486                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6487                                 table_offset = CP_ME_TABLE_OFFSET;
6488                         } else if (me == 2) {
6489                                 fw_data = (const __be32 *)rdev->me_fw->data;
6490                                 table_offset = CP_ME_TABLE_OFFSET;
6491                         } else {
6492                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6493                                 table_offset = CP_MEC_TABLE_OFFSET;
6494                         }
6495
6496                         for (i = 0; i < table_size; i ++) {
6497                                 dst_ptr[bo_offset + i] =
6498                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6499                         }
6500                         bo_offset += table_size;
6501                 }
6502         }
6503 }
6504
6505 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6506                                 bool enable)
6507 {
6508         u32 data, orig;
6509
6510         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6511                 orig = data = RREG32(RLC_PG_CNTL);
6512                 data |= GFX_PG_ENABLE;
6513                 if (orig != data)
6514                         WREG32(RLC_PG_CNTL, data);
6515
6516                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6517                 data |= AUTO_PG_EN;
6518                 if (orig != data)
6519                         WREG32(RLC_AUTO_PG_CTRL, data);
6520         } else {
6521                 orig = data = RREG32(RLC_PG_CNTL);
6522                 data &= ~GFX_PG_ENABLE;
6523                 if (orig != data)
6524                         WREG32(RLC_PG_CNTL, data);
6525
6526                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6527                 data &= ~AUTO_PG_EN;
6528                 if (orig != data)
6529                         WREG32(RLC_AUTO_PG_CTRL, data);
6530
6531                 data = RREG32(DB_RENDER_CONTROL);
6532         }
6533 }
6534
6535 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6536 {
6537         u32 mask = 0, tmp, tmp1;
6538         int i;
6539
6540         cik_select_se_sh(rdev, se, sh);
6541         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6542         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6543         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6544
6545         tmp &= 0xffff0000;
6546
6547         tmp |= tmp1;
6548         tmp >>= 16;
6549
6550         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6551                 mask <<= 1;
6552                 mask |= 1;
6553         }
6554
6555         return (~tmp) & mask;
6556 }
6557
6558 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6559 {
6560         u32 i, j, k, active_cu_number = 0;
6561         u32 mask, counter, cu_bitmap;
6562         u32 tmp = 0;
6563
6564         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6565                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6566                         mask = 1;
6567                         cu_bitmap = 0;
6568                         counter = 0;
6569                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6570                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6571                                         if (counter < 2)
6572                                                 cu_bitmap |= mask;
6573                                         counter ++;
6574                                 }
6575                                 mask <<= 1;
6576                         }
6577
6578                         active_cu_number += counter;
6579                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6580                 }
6581         }
6582
6583         WREG32(RLC_PG_AO_CU_MASK, tmp);
6584
6585         tmp = RREG32(RLC_MAX_PG_CU);
6586         tmp &= ~MAX_PU_CU_MASK;
6587         tmp |= MAX_PU_CU(active_cu_number);
6588         WREG32(RLC_MAX_PG_CU, tmp);
6589 }
6590
6591 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6592                                        bool enable)
6593 {
6594         u32 data, orig;
6595
6596         orig = data = RREG32(RLC_PG_CNTL);
6597         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6598                 data |= STATIC_PER_CU_PG_ENABLE;
6599         else
6600                 data &= ~STATIC_PER_CU_PG_ENABLE;
6601         if (orig != data)
6602                 WREG32(RLC_PG_CNTL, data);
6603 }
6604
6605 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6606                                         bool enable)
6607 {
6608         u32 data, orig;
6609
6610         orig = data = RREG32(RLC_PG_CNTL);
6611         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6612                 data |= DYN_PER_CU_PG_ENABLE;
6613         else
6614                 data &= ~DYN_PER_CU_PG_ENABLE;
6615         if (orig != data)
6616                 WREG32(RLC_PG_CNTL, data);
6617 }
6618
6619 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6620 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6621
6622 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6623 {
6624         u32 data, orig;
6625         u32 i;
6626
6627         if (rdev->rlc.cs_data) {
6628                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6629                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6630                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6631                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6632         } else {
6633                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6634                 for (i = 0; i < 3; i++)
6635                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6636         }
6637         if (rdev->rlc.reg_list) {
6638                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6639                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6640                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6641         }
6642
6643         orig = data = RREG32(RLC_PG_CNTL);
6644         data |= GFX_PG_SRC;
6645         if (orig != data)
6646                 WREG32(RLC_PG_CNTL, data);
6647
6648         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6649         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6650
6651         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6652         data &= ~IDLE_POLL_COUNT_MASK;
6653         data |= IDLE_POLL_COUNT(0x60);
6654         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6655
6656         data = 0x10101010;
6657         WREG32(RLC_PG_DELAY, data);
6658
6659         data = RREG32(RLC_PG_DELAY_2);
6660         data &= ~0xff;
6661         data |= 0x3;
6662         WREG32(RLC_PG_DELAY_2, data);
6663
6664         data = RREG32(RLC_AUTO_PG_CTRL);
6665         data &= ~GRBM_REG_SGIT_MASK;
6666         data |= GRBM_REG_SGIT(0x700);
6667         WREG32(RLC_AUTO_PG_CTRL, data);
6668
6669 }
6670
6671 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6672 {
6673         cik_enable_gfx_cgpg(rdev, enable);
6674         cik_enable_gfx_static_mgpg(rdev, enable);
6675         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6676 }
6677
6678 u32 cik_get_csb_size(struct radeon_device *rdev)
6679 {
6680         u32 count = 0;
6681         const struct cs_section_def *sect = NULL;
6682         const struct cs_extent_def *ext = NULL;
6683
6684         if (rdev->rlc.cs_data == NULL)
6685                 return 0;
6686
6687         /* begin clear state */
6688         count += 2;
6689         /* context control state */
6690         count += 3;
6691
6692         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6693                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6694                         if (sect->id == SECT_CONTEXT)
6695                                 count += 2 + ext->reg_count;
6696                         else
6697                                 return 0;
6698                 }
6699         }
6700         /* pa_sc_raster_config/pa_sc_raster_config1 */
6701         count += 4;
6702         /* end clear state */
6703         count += 2;
6704         /* clear state */
6705         count += 2;
6706
6707         return count;
6708 }
6709
6710 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6711 {
6712         u32 count = 0, i;
6713         const struct cs_section_def *sect = NULL;
6714         const struct cs_extent_def *ext = NULL;
6715
6716         if (rdev->rlc.cs_data == NULL)
6717                 return;
6718         if (buffer == NULL)
6719                 return;
6720
6721         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6722         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6723
6724         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6725         buffer[count++] = cpu_to_le32(0x80000000);
6726         buffer[count++] = cpu_to_le32(0x80000000);
6727
6728         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6729                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6730                         if (sect->id == SECT_CONTEXT) {
6731                                 buffer[count++] =
6732                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6733                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6734                                 for (i = 0; i < ext->reg_count; i++)
6735                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6736                         } else {
6737                                 return;
6738                         }
6739                 }
6740         }
6741
6742         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6743         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6744         switch (rdev->family) {
6745         case CHIP_BONAIRE:
6746                 buffer[count++] = cpu_to_le32(0x16000012);
6747                 buffer[count++] = cpu_to_le32(0x00000000);
6748                 break;
6749         case CHIP_KAVERI:
6750                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6751                 buffer[count++] = cpu_to_le32(0x00000000);
6752                 break;
6753         case CHIP_KABINI:
6754         case CHIP_MULLINS:
6755                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6756                 buffer[count++] = cpu_to_le32(0x00000000);
6757                 break;
6758         case CHIP_HAWAII:
6759                 buffer[count++] = cpu_to_le32(0x3a00161a);
6760                 buffer[count++] = cpu_to_le32(0x0000002e);
6761                 break;
6762         default:
6763                 buffer[count++] = cpu_to_le32(0x00000000);
6764                 buffer[count++] = cpu_to_le32(0x00000000);
6765                 break;
6766         }
6767
6768         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6769         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6770
6771         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6772         buffer[count++] = cpu_to_le32(0);
6773 }
6774
6775 static void cik_init_pg(struct radeon_device *rdev)
6776 {
6777         if (rdev->pg_flags) {
6778                 cik_enable_sck_slowdown_on_pu(rdev, true);
6779                 cik_enable_sck_slowdown_on_pd(rdev, true);
6780                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6781                         cik_init_gfx_cgpg(rdev);
6782                         cik_enable_cp_pg(rdev, true);
6783                         cik_enable_gds_pg(rdev, true);
6784                 }
6785                 cik_init_ao_cu_mask(rdev);
6786                 cik_update_gfx_pg(rdev, true);
6787         }
6788 }
6789
6790 static void cik_fini_pg(struct radeon_device *rdev)
6791 {
6792         if (rdev->pg_flags) {
6793                 cik_update_gfx_pg(rdev, false);
6794                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6795                         cik_enable_cp_pg(rdev, false);
6796                         cik_enable_gds_pg(rdev, false);
6797                 }
6798         }
6799 }
6800
6801 /*
6802  * Interrupts
6803  * Starting with r6xx, interrupts are handled via a ring buffer.
6804  * Ring buffers are areas of GPU accessible memory that the GPU
6805  * writes interrupt vectors into and the host reads vectors out of.
6806  * There is a rptr (read pointer) that determines where the
6807  * host is currently reading, and a wptr (write pointer)
6808  * which determines where the GPU has written.  When the
6809  * pointers are equal, the ring is idle.  When the GPU
6810  * writes vectors to the ring buffer, it increments the
6811  * wptr.  When there is an interrupt, the host then starts
6812  * fetching commands and processing them until the pointers are
6813  * equal again at which point it updates the rptr.
6814  */
6815
6816 /**
6817  * cik_enable_interrupts - Enable the interrupt ring buffer
6818  *
6819  * @rdev: radeon_device pointer
6820  *
6821  * Enable the interrupt ring buffer (CIK).
6822  */
6823 static void cik_enable_interrupts(struct radeon_device *rdev)
6824 {
6825         u32 ih_cntl = RREG32(IH_CNTL);
6826         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6827
6828         ih_cntl |= ENABLE_INTR;
6829         ih_rb_cntl |= IH_RB_ENABLE;
6830         WREG32(IH_CNTL, ih_cntl);
6831         WREG32(IH_RB_CNTL, ih_rb_cntl);
6832         rdev->ih.enabled = true;
6833 }
6834
6835 /**
6836  * cik_disable_interrupts - Disable the interrupt ring buffer
6837  *
6838  * @rdev: radeon_device pointer
6839  *
6840  * Disable the interrupt ring buffer (CIK).
6841  */
6842 static void cik_disable_interrupts(struct radeon_device *rdev)
6843 {
6844         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6845         u32 ih_cntl = RREG32(IH_CNTL);
6846
6847         ih_rb_cntl &= ~IH_RB_ENABLE;
6848         ih_cntl &= ~ENABLE_INTR;
6849         WREG32(IH_RB_CNTL, ih_rb_cntl);
6850         WREG32(IH_CNTL, ih_cntl);
6851         /* set rptr, wptr to 0 */
6852         WREG32(IH_RB_RPTR, 0);
6853         WREG32(IH_RB_WPTR, 0);
6854         rdev->ih.enabled = false;
6855         rdev->ih.rptr = 0;
6856 }
6857
6858 /**
6859  * cik_disable_interrupt_state - Disable all interrupt sources
6860  *
6861  * @rdev: radeon_device pointer
6862  *
6863  * Clear all interrupt enable bits used by the driver (CIK).
6864  */
6865 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6866 {
6867         u32 tmp;
6868
6869         /* gfx ring */
6870         tmp = RREG32(CP_INT_CNTL_RING0) &
6871                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6872         WREG32(CP_INT_CNTL_RING0, tmp);
6873         /* sdma */
6874         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6875         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6876         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6877         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6878         /* compute queues */
6879         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6880         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6881         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6882         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6883         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6884         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6885         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6886         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6887         /* grbm */
6888         WREG32(GRBM_INT_CNTL, 0);
6889         /* SRBM */
6890         WREG32(SRBM_INT_CNTL, 0);
6891         /* vline/vblank, etc. */
6892         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6893         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6894         if (rdev->num_crtc >= 4) {
6895                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6896                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6897         }
6898         if (rdev->num_crtc >= 6) {
6899                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6900                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6901         }
6902         /* pflip */
6903         if (rdev->num_crtc >= 2) {
6904                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6905                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6906         }
6907         if (rdev->num_crtc >= 4) {
6908                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6909                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6910         }
6911         if (rdev->num_crtc >= 6) {
6912                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6913                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6914         }
6915
6916         /* dac hotplug */
6917         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6918
6919         /* digital hotplug */
6920         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6921         WREG32(DC_HPD1_INT_CONTROL, tmp);
6922         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6923         WREG32(DC_HPD2_INT_CONTROL, tmp);
6924         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6925         WREG32(DC_HPD3_INT_CONTROL, tmp);
6926         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6927         WREG32(DC_HPD4_INT_CONTROL, tmp);
6928         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6929         WREG32(DC_HPD5_INT_CONTROL, tmp);
6930         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6931         WREG32(DC_HPD6_INT_CONTROL, tmp);
6932
6933 }
6934
6935 /**
6936  * cik_irq_init - init and enable the interrupt ring
6937  *
6938  * @rdev: radeon_device pointer
6939  *
6940  * Allocate a ring buffer for the interrupt controller,
6941  * enable the RLC, disable interrupts, enable the IH
6942  * ring buffer and enable it (CIK).
6943  * Called at device load and reume.
6944  * Returns 0 for success, errors for failure.
6945  */
6946 static int cik_irq_init(struct radeon_device *rdev)
6947 {
6948         int ret = 0;
6949         int rb_bufsz;
6950         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6951
6952         /* allocate ring */
6953         ret = r600_ih_ring_alloc(rdev);
6954         if (ret)
6955                 return ret;
6956
6957         /* disable irqs */
6958         cik_disable_interrupts(rdev);
6959
6960         /* init rlc */
6961         ret = cik_rlc_resume(rdev);
6962         if (ret) {
6963                 r600_ih_ring_fini(rdev);
6964                 return ret;
6965         }
6966
6967         /* setup interrupt control */
6968         /* set dummy read address to dummy page address */
6969         WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6970         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6971         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6972          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6973          */
6974         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6975         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6976         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6977         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6978
6979         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6980         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6981
6982         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6983                       IH_WPTR_OVERFLOW_CLEAR |
6984                       (rb_bufsz << 1));
6985
6986         if (rdev->wb.enabled)
6987                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6988
6989         /* set the writeback address whether it's enabled or not */
6990         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6991         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6992
6993         WREG32(IH_RB_CNTL, ih_rb_cntl);
6994
6995         /* set rptr, wptr to 0 */
6996         WREG32(IH_RB_RPTR, 0);
6997         WREG32(IH_RB_WPTR, 0);
6998
6999         /* Default settings for IH_CNTL (disabled at first) */
7000         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7001         /* RPTR_REARM only works if msi's are enabled */
7002         if (rdev->msi_enabled)
7003                 ih_cntl |= RPTR_REARM;
7004         WREG32(IH_CNTL, ih_cntl);
7005
7006         /* force the active interrupt state to all disabled */
7007         cik_disable_interrupt_state(rdev);
7008
7009         pci_set_master(rdev->pdev);
7010
7011         /* enable irqs */
7012         cik_enable_interrupts(rdev);
7013
7014         return ret;
7015 }
7016
7017 /**
7018  * cik_irq_set - enable/disable interrupt sources
7019  *
7020  * @rdev: radeon_device pointer
7021  *
7022  * Enable interrupt sources on the GPU (vblanks, hpd,
7023  * etc.) (CIK).
7024  * Returns 0 for success, errors for failure.
7025  */
7026 int cik_irq_set(struct radeon_device *rdev)
7027 {
7028         u32 cp_int_cntl;
7029         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7030         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7031         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7032         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7033         u32 grbm_int_cntl = 0;
7034         u32 dma_cntl, dma_cntl1;
7035
7036         if (!rdev->irq.installed) {
7037                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7038                 return -EINVAL;
7039         }
7040         /* don't enable anything if the ih is disabled */
7041         if (!rdev->ih.enabled) {
7042                 cik_disable_interrupts(rdev);
7043                 /* force the active interrupt state to all disabled */
7044                 cik_disable_interrupt_state(rdev);
7045                 return 0;
7046         }
7047
7048         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7049                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7050         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7051
7052         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7053         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7054         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7055         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7056         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7057         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7058
7059         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7060         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7061
7062         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7063         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7064         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7065         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7066         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7067         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7068         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7069         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7070
7071         /* enable CP interrupts on all rings */
7072         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7073                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7074                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7075         }
7076         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7077                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7078                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7079                 if (ring->me == 1) {
7080                         switch (ring->pipe) {
7081                         case 0:
7082                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7083                                 break;
7084                         case 1:
7085                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7086                                 break;
7087                         case 2:
7088                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7089                                 break;
7090                         case 3:
7091                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7092                                 break;
7093                         default:
7094                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7095                                 break;
7096                         }
7097                 } else if (ring->me == 2) {
7098                         switch (ring->pipe) {
7099                         case 0:
7100                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7101                                 break;
7102                         case 1:
7103                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7104                                 break;
7105                         case 2:
7106                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7107                                 break;
7108                         case 3:
7109                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7110                                 break;
7111                         default:
7112                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7113                                 break;
7114                         }
7115                 } else {
7116                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7117                 }
7118         }
7119         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7120                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7121                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7122                 if (ring->me == 1) {
7123                         switch (ring->pipe) {
7124                         case 0:
7125                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7126                                 break;
7127                         case 1:
7128                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7129                                 break;
7130                         case 2:
7131                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7132                                 break;
7133                         case 3:
7134                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7135                                 break;
7136                         default:
7137                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7138                                 break;
7139                         }
7140                 } else if (ring->me == 2) {
7141                         switch (ring->pipe) {
7142                         case 0:
7143                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7144                                 break;
7145                         case 1:
7146                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7147                                 break;
7148                         case 2:
7149                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7150                                 break;
7151                         case 3:
7152                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7153                                 break;
7154                         default:
7155                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7156                                 break;
7157                         }
7158                 } else {
7159                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7160                 }
7161         }
7162
7163         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7164                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7165                 dma_cntl |= TRAP_ENABLE;
7166         }
7167
7168         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7169                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7170                 dma_cntl1 |= TRAP_ENABLE;
7171         }
7172
7173         if (rdev->irq.crtc_vblank_int[0] ||
7174             atomic_read(&rdev->irq.pflip[0])) {
7175                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7176                 crtc1 |= VBLANK_INTERRUPT_MASK;
7177         }
7178         if (rdev->irq.crtc_vblank_int[1] ||
7179             atomic_read(&rdev->irq.pflip[1])) {
7180                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7181                 crtc2 |= VBLANK_INTERRUPT_MASK;
7182         }
7183         if (rdev->irq.crtc_vblank_int[2] ||
7184             atomic_read(&rdev->irq.pflip[2])) {
7185                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7186                 crtc3 |= VBLANK_INTERRUPT_MASK;
7187         }
7188         if (rdev->irq.crtc_vblank_int[3] ||
7189             atomic_read(&rdev->irq.pflip[3])) {
7190                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7191                 crtc4 |= VBLANK_INTERRUPT_MASK;
7192         }
7193         if (rdev->irq.crtc_vblank_int[4] ||
7194             atomic_read(&rdev->irq.pflip[4])) {
7195                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7196                 crtc5 |= VBLANK_INTERRUPT_MASK;
7197         }
7198         if (rdev->irq.crtc_vblank_int[5] ||
7199             atomic_read(&rdev->irq.pflip[5])) {
7200                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7201                 crtc6 |= VBLANK_INTERRUPT_MASK;
7202         }
7203         if (rdev->irq.hpd[0]) {
7204                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7205                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7206         }
7207         if (rdev->irq.hpd[1]) {
7208                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7209                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7210         }
7211         if (rdev->irq.hpd[2]) {
7212                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7213                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7214         }
7215         if (rdev->irq.hpd[3]) {
7216                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7217                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7218         }
7219         if (rdev->irq.hpd[4]) {
7220                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7221                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7222         }
7223         if (rdev->irq.hpd[5]) {
7224                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7225                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7226         }
7227
7228         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7229
7230         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7231         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7232
7233         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7234         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7235         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7236         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7237         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7238         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7239         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7240         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7241
7242         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7243
7244         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7245         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7246         if (rdev->num_crtc >= 4) {
7247                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7248                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7249         }
7250         if (rdev->num_crtc >= 6) {
7251                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7252                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7253         }
7254
7255         if (rdev->num_crtc >= 2) {
7256                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7257                        GRPH_PFLIP_INT_MASK);
7258                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7259                        GRPH_PFLIP_INT_MASK);
7260         }
7261         if (rdev->num_crtc >= 4) {
7262                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7263                        GRPH_PFLIP_INT_MASK);
7264                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7265                        GRPH_PFLIP_INT_MASK);
7266         }
7267         if (rdev->num_crtc >= 6) {
7268                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7269                        GRPH_PFLIP_INT_MASK);
7270                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7271                        GRPH_PFLIP_INT_MASK);
7272         }
7273
7274         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7275         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7276         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7277         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7278         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7279         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7280
7281         /* posting read */
7282         RREG32(SRBM_STATUS);
7283
7284         return 0;
7285 }
7286
7287 /**
7288  * cik_irq_ack - ack interrupt sources
7289  *
7290  * @rdev: radeon_device pointer
7291  *
7292  * Ack interrupt sources on the GPU (vblanks, hpd,
7293  * etc.) (CIK).  Certain interrupts sources are sw
7294  * generated and do not require an explicit ack.
7295  */
7296 static inline void cik_irq_ack(struct radeon_device *rdev)
7297 {
7298         u32 tmp;
7299
7300         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7301         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7302         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7303         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7304         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7305         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7306         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7307
7308         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7309                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7310         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7311                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7312         if (rdev->num_crtc >= 4) {
7313                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7314                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7315                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7316                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7317         }
7318         if (rdev->num_crtc >= 6) {
7319                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7320                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7321                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7322                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7323         }
7324
7325         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7326                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7327                        GRPH_PFLIP_INT_CLEAR);
7328         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7329                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7330                        GRPH_PFLIP_INT_CLEAR);
7331         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7332                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7333         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7334                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7335         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7336                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7337         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7338                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7339
7340         if (rdev->num_crtc >= 4) {
7341                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7342                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7343                                GRPH_PFLIP_INT_CLEAR);
7344                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7345                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7346                                GRPH_PFLIP_INT_CLEAR);
7347                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7348                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7349                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7350                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7351                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7352                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7353                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7354                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7355         }
7356
7357         if (rdev->num_crtc >= 6) {
7358                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7359                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7360                                GRPH_PFLIP_INT_CLEAR);
7361                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7362                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7363                                GRPH_PFLIP_INT_CLEAR);
7364                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7365                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7366                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7367                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7368                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7369                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7370                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7371                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7372         }
7373
7374         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7375                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7376                 tmp |= DC_HPDx_INT_ACK;
7377                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7378         }
7379         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7380                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7381                 tmp |= DC_HPDx_INT_ACK;
7382                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7383         }
7384         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7385                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7386                 tmp |= DC_HPDx_INT_ACK;
7387                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7388         }
7389         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7390                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7391                 tmp |= DC_HPDx_INT_ACK;
7392                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7393         }
7394         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7395                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7396                 tmp |= DC_HPDx_INT_ACK;
7397                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7398         }
7399         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7400                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7401                 tmp |= DC_HPDx_INT_ACK;
7402                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7403         }
7404         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7405                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7406                 tmp |= DC_HPDx_RX_INT_ACK;
7407                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7408         }
7409         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7410                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7411                 tmp |= DC_HPDx_RX_INT_ACK;
7412                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7413         }
7414         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7415                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7416                 tmp |= DC_HPDx_RX_INT_ACK;
7417                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7418         }
7419         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7420                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7421                 tmp |= DC_HPDx_RX_INT_ACK;
7422                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7423         }
7424         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7425                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7426                 tmp |= DC_HPDx_RX_INT_ACK;
7427                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7428         }
7429         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7430                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7431                 tmp |= DC_HPDx_RX_INT_ACK;
7432                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7433         }
7434 }
7435
7436 /**
7437  * cik_irq_disable - disable interrupts
7438  *
7439  * @rdev: radeon_device pointer
7440  *
7441  * Disable interrupts on the hw (CIK).
7442  */
7443 static void cik_irq_disable(struct radeon_device *rdev)
7444 {
7445         cik_disable_interrupts(rdev);
7446         /* Wait and acknowledge irq */
7447         mdelay(1);
7448         cik_irq_ack(rdev);
7449         cik_disable_interrupt_state(rdev);
7450 }
7451
7452 /**
7453  * cik_irq_disable - disable interrupts for suspend
7454  *
7455  * @rdev: radeon_device pointer
7456  *
7457  * Disable interrupts and stop the RLC (CIK).
7458  * Used for suspend.
7459  */
7460 static void cik_irq_suspend(struct radeon_device *rdev)
7461 {
7462         cik_irq_disable(rdev);
7463         cik_rlc_stop(rdev);
7464 }
7465
7466 /**
7467  * cik_irq_fini - tear down interrupt support
7468  *
7469  * @rdev: radeon_device pointer
7470  *
7471  * Disable interrupts on the hw and free the IH ring
7472  * buffer (CIK).
7473  * Used for driver unload.
7474  */
7475 static void cik_irq_fini(struct radeon_device *rdev)
7476 {
7477         cik_irq_suspend(rdev);
7478         r600_ih_ring_fini(rdev);
7479 }
7480
7481 /**
7482  * cik_get_ih_wptr - get the IH ring buffer wptr
7483  *
7484  * @rdev: radeon_device pointer
7485  *
7486  * Get the IH ring buffer wptr from either the register
7487  * or the writeback memory buffer (CIK).  Also check for
7488  * ring buffer overflow and deal with it.
7489  * Used by cik_irq_process().
7490  * Returns the value of the wptr.
7491  */
7492 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7493 {
7494         u32 wptr, tmp;
7495
7496         if (rdev->wb.enabled)
7497                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7498         else
7499                 wptr = RREG32(IH_RB_WPTR);
7500
7501         if (wptr & RB_OVERFLOW) {
7502                 wptr &= ~RB_OVERFLOW;
7503                 /* When a ring buffer overflow happen start parsing interrupt
7504                  * from the last not overwritten vector (wptr + 16). Hopefully
7505                  * this should allow us to catchup.
7506                  */
7507                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7508                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7509                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7510                 tmp = RREG32(IH_RB_CNTL);
7511                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7512                 WREG32(IH_RB_CNTL, tmp);
7513         }
7514         return (wptr & rdev->ih.ptr_mask);
7515 }
7516
7517 /*        CIK IV Ring
7518  * Each IV ring entry is 128 bits:
7519  * [7:0]    - interrupt source id
7520  * [31:8]   - reserved
7521  * [59:32]  - interrupt source data
7522  * [63:60]  - reserved
7523  * [71:64]  - RINGID
7524  *            CP:
7525  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7526  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7527  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7528  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7529  *            PIPE_ID - ME0 0=3D
7530  *                    - ME1&2 compute dispatcher (4 pipes each)
7531  *            SDMA:
7532  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7533  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7534  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7535  * [79:72]  - VMID
7536  * [95:80]  - PASID
7537  * [127:96] - reserved
7538  */
7539 /**
7540  * cik_irq_process - interrupt handler
7541  *
7542  * @rdev: radeon_device pointer
7543  *
7544  * Interrupt hander (CIK).  Walk the IH ring,
7545  * ack interrupts and schedule work to handle
7546  * interrupt events.
7547  * Returns irq process return code.
7548  */
7549 int cik_irq_process(struct radeon_device *rdev)
7550 {
7551         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7552         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7553         u32 wptr;
7554         u32 rptr;
7555         u32 src_id, src_data, ring_id;
7556         u8 me_id, pipe_id, queue_id;
7557         u32 ring_index;
7558         bool queue_hotplug = false;
7559         bool queue_dp = false;
7560         bool queue_reset = false;
7561         u32 addr, status, mc_client;
7562         bool queue_thermal = false;
7563
7564         if (!rdev->ih.enabled || rdev->shutdown)
7565                 return IRQ_NONE;
7566
7567         wptr = cik_get_ih_wptr(rdev);
7568
7569 restart_ih:
7570         /* is somebody else already processing irqs? */
7571         if (atomic_xchg(&rdev->ih.lock, 1))
7572                 return IRQ_NONE;
7573
7574         rptr = rdev->ih.rptr;
7575         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7576
7577         /* Order reading of wptr vs. reading of IH ring data */
7578         rmb();
7579
7580         /* display interrupts */
7581         cik_irq_ack(rdev);
7582
7583         while (rptr != wptr) {
7584                 /* wptr/rptr are in bytes! */
7585                 ring_index = rptr / 4;
7586
7587                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7588                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7589                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7590
7591                 switch (src_id) {
7592                 case 1: /* D1 vblank/vline */
7593                         switch (src_data) {
7594                         case 0: /* D1 vblank */
7595                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7596                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7597
7598                                 if (rdev->irq.crtc_vblank_int[0]) {
7599                                         drm_handle_vblank(rdev->ddev, 0);
7600                                         rdev->pm.vblank_sync = true;
7601                                         wake_up(&rdev->irq.vblank_queue);
7602                                 }
7603                                 if (atomic_read(&rdev->irq.pflip[0]))
7604                                         radeon_crtc_handle_vblank(rdev, 0);
7605                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7606                                 DRM_DEBUG("IH: D1 vblank\n");
7607
7608                                 break;
7609                         case 1: /* D1 vline */
7610                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7611                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7612
7613                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7614                                 DRM_DEBUG("IH: D1 vline\n");
7615
7616                                 break;
7617                         default:
7618                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7619                                 break;
7620                         }
7621                         break;
7622                 case 2: /* D2 vblank/vline */
7623                         switch (src_data) {
7624                         case 0: /* D2 vblank */
7625                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7626                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7627
7628                                 if (rdev->irq.crtc_vblank_int[1]) {
7629                                         drm_handle_vblank(rdev->ddev, 1);
7630                                         rdev->pm.vblank_sync = true;
7631                                         wake_up(&rdev->irq.vblank_queue);
7632                                 }
7633                                 if (atomic_read(&rdev->irq.pflip[1]))
7634                                         radeon_crtc_handle_vblank(rdev, 1);
7635                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7636                                 DRM_DEBUG("IH: D2 vblank\n");
7637
7638                                 break;
7639                         case 1: /* D2 vline */
7640                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7641                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7642
7643                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7644                                 DRM_DEBUG("IH: D2 vline\n");
7645
7646                                 break;
7647                         default:
7648                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7649                                 break;
7650                         }
7651                         break;
7652                 case 3: /* D3 vblank/vline */
7653                         switch (src_data) {
7654                         case 0: /* D3 vblank */
7655                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7656                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7657
7658                                 if (rdev->irq.crtc_vblank_int[2]) {
7659                                         drm_handle_vblank(rdev->ddev, 2);
7660                                         rdev->pm.vblank_sync = true;
7661                                         wake_up(&rdev->irq.vblank_queue);
7662                                 }
7663                                 if (atomic_read(&rdev->irq.pflip[2]))
7664                                         radeon_crtc_handle_vblank(rdev, 2);
7665                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7666                                 DRM_DEBUG("IH: D3 vblank\n");
7667
7668                                 break;
7669                         case 1: /* D3 vline */
7670                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7671                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7672
7673                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7674                                 DRM_DEBUG("IH: D3 vline\n");
7675
7676                                 break;
7677                         default:
7678                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7679                                 break;
7680                         }
7681                         break;
7682                 case 4: /* D4 vblank/vline */
7683                         switch (src_data) {
7684                         case 0: /* D4 vblank */
7685                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7686                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7687
7688                                 if (rdev->irq.crtc_vblank_int[3]) {
7689                                         drm_handle_vblank(rdev->ddev, 3);
7690                                         rdev->pm.vblank_sync = true;
7691                                         wake_up(&rdev->irq.vblank_queue);
7692                                 }
7693                                 if (atomic_read(&rdev->irq.pflip[3]))
7694                                         radeon_crtc_handle_vblank(rdev, 3);
7695                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7696                                 DRM_DEBUG("IH: D4 vblank\n");
7697
7698                                 break;
7699                         case 1: /* D4 vline */
7700                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7701                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7702
7703                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7704                                 DRM_DEBUG("IH: D4 vline\n");
7705
7706                                 break;
7707                         default:
7708                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7709                                 break;
7710                         }
7711                         break;
7712                 case 5: /* D5 vblank/vline */
7713                         switch (src_data) {
7714                         case 0: /* D5 vblank */
7715                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7716                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7717
7718                                 if (rdev->irq.crtc_vblank_int[4]) {
7719                                         drm_handle_vblank(rdev->ddev, 4);
7720                                         rdev->pm.vblank_sync = true;
7721                                         wake_up(&rdev->irq.vblank_queue);
7722                                 }
7723                                 if (atomic_read(&rdev->irq.pflip[4]))
7724                                         radeon_crtc_handle_vblank(rdev, 4);
7725                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7726                                 DRM_DEBUG("IH: D5 vblank\n");
7727
7728                                 break;
7729                         case 1: /* D5 vline */
7730                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7731                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7732
7733                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7734                                 DRM_DEBUG("IH: D5 vline\n");
7735
7736                                 break;
7737                         default:
7738                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7739                                 break;
7740                         }
7741                         break;
7742                 case 6: /* D6 vblank/vline */
7743                         switch (src_data) {
7744                         case 0: /* D6 vblank */
7745                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7746                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7747
7748                                 if (rdev->irq.crtc_vblank_int[5]) {
7749                                         drm_handle_vblank(rdev->ddev, 5);
7750                                         rdev->pm.vblank_sync = true;
7751                                         wake_up(&rdev->irq.vblank_queue);
7752                                 }
7753                                 if (atomic_read(&rdev->irq.pflip[5]))
7754                                         radeon_crtc_handle_vblank(rdev, 5);
7755                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7756                                 DRM_DEBUG("IH: D6 vblank\n");
7757
7758                                 break;
7759                         case 1: /* D6 vline */
7760                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7761                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7762
7763                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7764                                 DRM_DEBUG("IH: D6 vline\n");
7765
7766                                 break;
7767                         default:
7768                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7769                                 break;
7770                         }
7771                         break;
7772                 case 8: /* D1 page flip */
7773                 case 10: /* D2 page flip */
7774                 case 12: /* D3 page flip */
7775                 case 14: /* D4 page flip */
7776                 case 16: /* D5 page flip */
7777                 case 18: /* D6 page flip */
7778                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7779                         if (radeon_use_pflipirq > 0)
7780                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7781                         break;
7782                 case 42: /* HPD hotplug */
7783                         switch (src_data) {
7784                         case 0:
7785                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7786                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7787
7788                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7789                                 queue_hotplug = true;
7790                                 DRM_DEBUG("IH: HPD1\n");
7791
7792                                 break;
7793                         case 1:
7794                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7795                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7796
7797                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7798                                 queue_hotplug = true;
7799                                 DRM_DEBUG("IH: HPD2\n");
7800
7801                                 break;
7802                         case 2:
7803                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7804                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7805
7806                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7807                                 queue_hotplug = true;
7808                                 DRM_DEBUG("IH: HPD3\n");
7809
7810                                 break;
7811                         case 3:
7812                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7813                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7814
7815                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7816                                 queue_hotplug = true;
7817                                 DRM_DEBUG("IH: HPD4\n");
7818
7819                                 break;
7820                         case 4:
7821                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7822                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7823
7824                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7825                                 queue_hotplug = true;
7826                                 DRM_DEBUG("IH: HPD5\n");
7827
7828                                 break;
7829                         case 5:
7830                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7831                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7832
7833                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7834                                 queue_hotplug = true;
7835                                 DRM_DEBUG("IH: HPD6\n");
7836
7837                                 break;
7838                         case 6:
7839                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7840                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7841
7842                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7843                                 queue_dp = true;
7844                                 DRM_DEBUG("IH: HPD_RX 1\n");
7845
7846                                 break;
7847                         case 7:
7848                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7849                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7850
7851                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7852                                 queue_dp = true;
7853                                 DRM_DEBUG("IH: HPD_RX 2\n");
7854
7855                                 break;
7856                         case 8:
7857                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7858                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7859
7860                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7861                                 queue_dp = true;
7862                                 DRM_DEBUG("IH: HPD_RX 3\n");
7863
7864                                 break;
7865                         case 9:
7866                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7867                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7868
7869                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7870                                 queue_dp = true;
7871                                 DRM_DEBUG("IH: HPD_RX 4\n");
7872
7873                                 break;
7874                         case 10:
7875                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7876                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7877
7878                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7879                                 queue_dp = true;
7880                                 DRM_DEBUG("IH: HPD_RX 5\n");
7881
7882                                 break;
7883                         case 11:
7884                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7885                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7886
7887                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7888                                 queue_dp = true;
7889                                 DRM_DEBUG("IH: HPD_RX 6\n");
7890
7891                                 break;
7892                         default:
7893                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7894                                 break;
7895                         }
7896                         break;
7897                 case 96:
7898                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7899                         WREG32(SRBM_INT_ACK, 0x1);
7900                         break;
7901                 case 124: /* UVD */
7902                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7903                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7904                         break;
7905                 case 146:
7906                 case 147:
7907                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7908                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7909                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7910                         /* reset addr and status */
7911                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7912                         if (addr == 0x0 && status == 0x0)
7913                                 break;
7914                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7915                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7916                                 addr);
7917                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7918                                 status);
7919                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7920                         break;
7921                 case 167: /* VCE */
7922                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7923                         switch (src_data) {
7924                         case 0:
7925                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7926                                 break;
7927                         case 1:
7928                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7929                                 break;
7930                         default:
7931                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7932                                 break;
7933                         }
7934                         break;
7935                 case 176: /* GFX RB CP_INT */
7936                 case 177: /* GFX IB CP_INT */
7937                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7938                         break;
7939                 case 181: /* CP EOP event */
7940                         DRM_DEBUG("IH: CP EOP\n");
7941                         /* XXX check the bitfield order! */
7942                         me_id = (ring_id & 0x60) >> 5;
7943                         pipe_id = (ring_id & 0x18) >> 3;
7944                         queue_id = (ring_id & 0x7) >> 0;
7945                         switch (me_id) {
7946                         case 0:
7947                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7948                                 break;
7949                         case 1:
7950                         case 2:
7951                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7952                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7953                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7954                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7955                                 break;
7956                         }
7957                         break;
7958                 case 184: /* CP Privileged reg access */
7959                         DRM_ERROR("Illegal register access in command stream\n");
7960                         /* XXX check the bitfield order! */
7961                         me_id = (ring_id & 0x60) >> 5;
7962                         pipe_id = (ring_id & 0x18) >> 3;
7963                         queue_id = (ring_id & 0x7) >> 0;
7964                         switch (me_id) {
7965                         case 0:
7966                                 /* This results in a full GPU reset, but all we need to do is soft
7967                                  * reset the CP for gfx
7968                                  */
7969                                 queue_reset = true;
7970                                 break;
7971                         case 1:
7972                                 /* XXX compute */
7973                                 queue_reset = true;
7974                                 break;
7975                         case 2:
7976                                 /* XXX compute */
7977                                 queue_reset = true;
7978                                 break;
7979                         }
7980                         break;
7981                 case 185: /* CP Privileged inst */
7982                         DRM_ERROR("Illegal instruction in command stream\n");
7983                         /* XXX check the bitfield order! */
7984                         me_id = (ring_id & 0x60) >> 5;
7985                         pipe_id = (ring_id & 0x18) >> 3;
7986                         queue_id = (ring_id & 0x7) >> 0;
7987                         switch (me_id) {
7988                         case 0:
7989                                 /* This results in a full GPU reset, but all we need to do is soft
7990                                  * reset the CP for gfx
7991                                  */
7992                                 queue_reset = true;
7993                                 break;
7994                         case 1:
7995                                 /* XXX compute */
7996                                 queue_reset = true;
7997                                 break;
7998                         case 2:
7999                                 /* XXX compute */
8000                                 queue_reset = true;
8001                                 break;
8002                         }
8003                         break;
8004                 case 224: /* SDMA trap event */
8005                         /* XXX check the bitfield order! */
8006                         me_id = (ring_id & 0x3) >> 0;
8007                         queue_id = (ring_id & 0xc) >> 2;
8008                         DRM_DEBUG("IH: SDMA trap\n");
8009                         switch (me_id) {
8010                         case 0:
8011                                 switch (queue_id) {
8012                                 case 0:
8013                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8014                                         break;
8015                                 case 1:
8016                                         /* XXX compute */
8017                                         break;
8018                                 case 2:
8019                                         /* XXX compute */
8020                                         break;
8021                                 }
8022                                 break;
8023                         case 1:
8024                                 switch (queue_id) {
8025                                 case 0:
8026                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8027                                         break;
8028                                 case 1:
8029                                         /* XXX compute */
8030                                         break;
8031                                 case 2:
8032                                         /* XXX compute */
8033                                         break;
8034                                 }
8035                                 break;
8036                         }
8037                         break;
8038                 case 230: /* thermal low to high */
8039                         DRM_DEBUG("IH: thermal low to high\n");
8040                         rdev->pm.dpm.thermal.high_to_low = false;
8041                         queue_thermal = true;
8042                         break;
8043                 case 231: /* thermal high to low */
8044                         DRM_DEBUG("IH: thermal high to low\n");
8045                         rdev->pm.dpm.thermal.high_to_low = true;
8046                         queue_thermal = true;
8047                         break;
8048                 case 233: /* GUI IDLE */
8049                         DRM_DEBUG("IH: GUI idle\n");
8050                         break;
8051                 case 241: /* SDMA Privileged inst */
8052                 case 247: /* SDMA Privileged inst */
8053                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8054                         /* XXX check the bitfield order! */
8055                         me_id = (ring_id & 0x3) >> 0;
8056                         queue_id = (ring_id & 0xc) >> 2;
8057                         switch (me_id) {
8058                         case 0:
8059                                 switch (queue_id) {
8060                                 case 0:
8061                                         queue_reset = true;
8062                                         break;
8063                                 case 1:
8064                                         /* XXX compute */
8065                                         queue_reset = true;
8066                                         break;
8067                                 case 2:
8068                                         /* XXX compute */
8069                                         queue_reset = true;
8070                                         break;
8071                                 }
8072                                 break;
8073                         case 1:
8074                                 switch (queue_id) {
8075                                 case 0:
8076                                         queue_reset = true;
8077                                         break;
8078                                 case 1:
8079                                         /* XXX compute */
8080                                         queue_reset = true;
8081                                         break;
8082                                 case 2:
8083                                         /* XXX compute */
8084                                         queue_reset = true;
8085                                         break;
8086                                 }
8087                                 break;
8088                         }
8089                         break;
8090                 default:
8091                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8092                         break;
8093                 }
8094
8095                 /* wptr/rptr are in bytes! */
8096                 rptr += 16;
8097                 rptr &= rdev->ih.ptr_mask;
8098                 WREG32(IH_RB_RPTR, rptr);
8099         }
8100         if (queue_dp)
8101                 schedule_work(&rdev->dp_work);
8102         if (queue_hotplug)
8103                 schedule_delayed_work(&rdev->hotplug_work, 0);
8104         if (queue_reset) {
8105                 rdev->needs_reset = true;
8106                 wake_up_all(&rdev->fence_queue);
8107         }
8108         if (queue_thermal)
8109                 schedule_work(&rdev->pm.dpm.thermal.work);
8110         rdev->ih.rptr = rptr;
8111         atomic_set(&rdev->ih.lock, 0);
8112
8113         /* make sure wptr hasn't changed while processing */
8114         wptr = cik_get_ih_wptr(rdev);
8115         if (wptr != rptr)
8116                 goto restart_ih;
8117
8118         return IRQ_HANDLED;
8119 }
8120
8121 /*
8122  * startup/shutdown callbacks
8123  */
8124 static void cik_uvd_init(struct radeon_device *rdev)
8125 {
8126         int r;
8127
8128         if (!rdev->has_uvd)
8129                 return;
8130
8131         r = radeon_uvd_init(rdev);
8132         if (r) {
8133                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8134                 /*
8135                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8136                  * to early fails cik_uvd_start() and thus nothing happens
8137                  * there. So it is pointless to try to go through that code
8138                  * hence why we disable uvd here.
8139                  */
8140                 rdev->has_uvd = 0;
8141                 return;
8142         }
8143         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8144         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8145 }
8146
8147 static void cik_uvd_start(struct radeon_device *rdev)
8148 {
8149         int r;
8150
8151         if (!rdev->has_uvd)
8152                 return;
8153
8154         r = radeon_uvd_resume(rdev);
8155         if (r) {
8156                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8157                 goto error;
8158         }
8159         r = uvd_v4_2_resume(rdev);
8160         if (r) {
8161                 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8162                 goto error;
8163         }
8164         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8165         if (r) {
8166                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8167                 goto error;
8168         }
8169         return;
8170
8171 error:
8172         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8173 }
8174
8175 static void cik_uvd_resume(struct radeon_device *rdev)
8176 {
8177         struct radeon_ring *ring;
8178         int r;
8179
8180         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8181                 return;
8182
8183         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8184         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8185         if (r) {
8186                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8187                 return;
8188         }
8189         r = uvd_v1_0_init(rdev);
8190         if (r) {
8191                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8192                 return;
8193         }
8194 }
8195
8196 static void cik_vce_init(struct radeon_device *rdev)
8197 {
8198         int r;
8199
8200         if (!rdev->has_vce)
8201                 return;
8202
8203         r = radeon_vce_init(rdev);
8204         if (r) {
8205                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8206                 /*
8207                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
8208                  * to early fails cik_vce_start() and thus nothing happens
8209                  * there. So it is pointless to try to go through that code
8210                  * hence why we disable vce here.
8211                  */
8212                 rdev->has_vce = 0;
8213                 return;
8214         }
8215         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8216         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8217         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8218         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8219 }
8220
8221 static void cik_vce_start(struct radeon_device *rdev)
8222 {
8223         int r;
8224
8225         if (!rdev->has_vce)
8226                 return;
8227
8228         r = radeon_vce_resume(rdev);
8229         if (r) {
8230                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8231                 goto error;
8232         }
8233         r = vce_v2_0_resume(rdev);
8234         if (r) {
8235                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8236                 goto error;
8237         }
8238         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8239         if (r) {
8240                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8241                 goto error;
8242         }
8243         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8244         if (r) {
8245                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8246                 goto error;
8247         }
8248         return;
8249
8250 error:
8251         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8252         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8253 }
8254
8255 static void cik_vce_resume(struct radeon_device *rdev)
8256 {
8257         struct radeon_ring *ring;
8258         int r;
8259
8260         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8261                 return;
8262
8263         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8264         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8265         if (r) {
8266                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8267                 return;
8268         }
8269         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8270         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8271         if (r) {
8272                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8273                 return;
8274         }
8275         r = vce_v1_0_init(rdev);
8276         if (r) {
8277                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8278                 return;
8279         }
8280 }
8281
8282 /**
8283  * cik_startup - program the asic to a functional state
8284  *
8285  * @rdev: radeon_device pointer
8286  *
8287  * Programs the asic to a functional state (CIK).
8288  * Called by cik_init() and cik_resume().
8289  * Returns 0 for success, error for failure.
8290  */
8291 static int cik_startup(struct radeon_device *rdev)
8292 {
8293         struct radeon_ring *ring;
8294         u32 nop;
8295         int r;
8296
8297         /* enable pcie gen2/3 link */
8298         cik_pcie_gen3_enable(rdev);
8299         /* enable aspm */
8300         cik_program_aspm(rdev);
8301
8302         /* scratch needs to be initialized before MC */
8303         r = r600_vram_scratch_init(rdev);
8304         if (r)
8305                 return r;
8306
8307         cik_mc_program(rdev);
8308
8309         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8310                 r = ci_mc_load_microcode(rdev);
8311                 if (r) {
8312                         DRM_ERROR("Failed to load MC firmware!\n");
8313                         return r;
8314                 }
8315         }
8316
8317         r = cik_pcie_gart_enable(rdev);
8318         if (r)
8319                 return r;
8320         cik_gpu_init(rdev);
8321
8322         /* allocate rlc buffers */
8323         if (rdev->flags & RADEON_IS_IGP) {
8324                 if (rdev->family == CHIP_KAVERI) {
8325                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8326                         rdev->rlc.reg_list_size =
8327                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8328                 } else {
8329                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8330                         rdev->rlc.reg_list_size =
8331                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8332                 }
8333         }
8334         rdev->rlc.cs_data = ci_cs_data;
8335         rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8336         rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8337         r = sumo_rlc_init(rdev);
8338         if (r) {
8339                 DRM_ERROR("Failed to init rlc BOs!\n");
8340                 return r;
8341         }
8342
8343         /* allocate wb buffer */
8344         r = radeon_wb_init(rdev);
8345         if (r)
8346                 return r;
8347
8348         /* allocate mec buffers */
8349         r = cik_mec_init(rdev);
8350         if (r) {
8351                 DRM_ERROR("Failed to init MEC BOs!\n");
8352                 return r;
8353         }
8354
8355         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8356         if (r) {
8357                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8358                 return r;
8359         }
8360
8361         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8362         if (r) {
8363                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8364                 return r;
8365         }
8366
8367         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8368         if (r) {
8369                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8370                 return r;
8371         }
8372
8373         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8374         if (r) {
8375                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8376                 return r;
8377         }
8378
8379         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8380         if (r) {
8381                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8382                 return r;
8383         }
8384
8385         cik_uvd_start(rdev);
8386         cik_vce_start(rdev);
8387
8388         /* Enable IRQ */
8389         if (!rdev->irq.installed) {
8390                 r = radeon_irq_kms_init(rdev);
8391                 if (r)
8392                         return r;
8393         }
8394
8395         r = cik_irq_init(rdev);
8396         if (r) {
8397                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8398                 radeon_irq_kms_fini(rdev);
8399                 return r;
8400         }
8401         cik_irq_set(rdev);
8402
8403         if (rdev->family == CHIP_HAWAII) {
8404                 if (rdev->new_fw)
8405                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8406                 else
8407                         nop = RADEON_CP_PACKET2;
8408         } else {
8409                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8410         }
8411
8412         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8413         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8414                              nop);
8415         if (r)
8416                 return r;
8417
8418         /* set up the compute queues */
8419         /* type-2 packets are deprecated on MEC, use type-3 instead */
8420         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8421         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8422                              nop);
8423         if (r)
8424                 return r;
8425         ring->me = 1; /* first MEC */
8426         ring->pipe = 0; /* first pipe */
8427         ring->queue = 0; /* first queue */
8428         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8429
8430         /* type-2 packets are deprecated on MEC, use type-3 instead */
8431         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8432         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8433                              nop);
8434         if (r)
8435                 return r;
8436         /* dGPU only have 1 MEC */
8437         ring->me = 1; /* first MEC */
8438         ring->pipe = 0; /* first pipe */
8439         ring->queue = 1; /* second queue */
8440         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8441
8442         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8443         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8444                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8445         if (r)
8446                 return r;
8447
8448         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8449         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8450                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8451         if (r)
8452                 return r;
8453
8454         r = cik_cp_resume(rdev);
8455         if (r)
8456                 return r;
8457
8458         r = cik_sdma_resume(rdev);
8459         if (r)
8460                 return r;
8461
8462         cik_uvd_resume(rdev);
8463         cik_vce_resume(rdev);
8464
8465         r = radeon_ib_pool_init(rdev);
8466         if (r) {
8467                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8468                 return r;
8469         }
8470
8471         r = radeon_vm_manager_init(rdev);
8472         if (r) {
8473                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8474                 return r;
8475         }
8476
8477         r = radeon_audio_init(rdev);
8478         if (r)
8479                 return r;
8480
8481         return 0;
8482 }
8483
8484 /**
8485  * cik_resume - resume the asic to a functional state
8486  *
8487  * @rdev: radeon_device pointer
8488  *
8489  * Programs the asic to a functional state (CIK).
8490  * Called at resume.
8491  * Returns 0 for success, error for failure.
8492  */
8493 int cik_resume(struct radeon_device *rdev)
8494 {
8495         int r;
8496
8497         /* post card */
8498         atom_asic_init(rdev->mode_info.atom_context);
8499
8500         /* init golden registers */
8501         cik_init_golden_registers(rdev);
8502
8503         if (rdev->pm.pm_method == PM_METHOD_DPM)
8504                 radeon_pm_resume(rdev);
8505
8506         rdev->accel_working = true;
8507         r = cik_startup(rdev);
8508         if (r) {
8509                 DRM_ERROR("cik startup failed on resume\n");
8510                 rdev->accel_working = false;
8511                 return r;
8512         }
8513
8514         return r;
8515
8516 }
8517
8518 /**
8519  * cik_suspend - suspend the asic
8520  *
8521  * @rdev: radeon_device pointer
8522  *
8523  * Bring the chip into a state suitable for suspend (CIK).
8524  * Called at suspend.
8525  * Returns 0 for success.
8526  */
8527 int cik_suspend(struct radeon_device *rdev)
8528 {
8529         radeon_pm_suspend(rdev);
8530         radeon_audio_fini(rdev);
8531         radeon_vm_manager_fini(rdev);
8532         cik_cp_enable(rdev, false);
8533         cik_sdma_enable(rdev, false);
8534         if (rdev->has_uvd) {
8535                 uvd_v1_0_fini(rdev);
8536                 radeon_uvd_suspend(rdev);
8537         }
8538         if (rdev->has_vce)
8539                 radeon_vce_suspend(rdev);
8540         cik_fini_pg(rdev);
8541         cik_fini_cg(rdev);
8542         cik_irq_suspend(rdev);
8543         radeon_wb_disable(rdev);
8544         cik_pcie_gart_disable(rdev);
8545         return 0;
8546 }
8547
8548 /* Plan is to move initialization in that function and use
8549  * helper function so that radeon_device_init pretty much
8550  * do nothing more than calling asic specific function. This
8551  * should also allow to remove a bunch of callback function
8552  * like vram_info.
8553  */
8554 /**
8555  * cik_init - asic specific driver and hw init
8556  *
8557  * @rdev: radeon_device pointer
8558  *
8559  * Setup asic specific driver variables and program the hw
8560  * to a functional state (CIK).
8561  * Called at driver startup.
8562  * Returns 0 for success, errors for failure.
8563  */
8564 int cik_init(struct radeon_device *rdev)
8565 {
8566         struct radeon_ring *ring;
8567         int r;
8568
8569         /* Read BIOS */
8570         if (!radeon_get_bios(rdev)) {
8571                 if (ASIC_IS_AVIVO(rdev))
8572                         return -EINVAL;
8573         }
8574         /* Must be an ATOMBIOS */
8575         if (!rdev->is_atom_bios) {
8576                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8577                 return -EINVAL;
8578         }
8579         r = radeon_atombios_init(rdev);
8580         if (r)
8581                 return r;
8582
8583         /* Post card if necessary */
8584         if (!radeon_card_posted(rdev)) {
8585                 if (!rdev->bios) {
8586                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8587                         return -EINVAL;
8588                 }
8589                 DRM_INFO("GPU not posted. posting now...\n");
8590                 atom_asic_init(rdev->mode_info.atom_context);
8591         }
8592         /* init golden registers */
8593         cik_init_golden_registers(rdev);
8594         /* Initialize scratch registers */
8595         cik_scratch_init(rdev);
8596         /* Initialize surface registers */
8597         radeon_surface_init(rdev);
8598         /* Initialize clocks */
8599         radeon_get_clock_info(rdev->ddev);
8600
8601         /* Fence driver */
8602         r = radeon_fence_driver_init(rdev);
8603         if (r)
8604                 return r;
8605
8606         /* initialize memory controller */
8607         r = cik_mc_init(rdev);
8608         if (r)
8609                 return r;
8610         /* Memory manager */
8611         r = radeon_bo_init(rdev);
8612         if (r)
8613                 return r;
8614
8615         if (rdev->flags & RADEON_IS_IGP) {
8616                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8617                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8618                         r = cik_init_microcode(rdev);
8619                         if (r) {
8620                                 DRM_ERROR("Failed to load firmware!\n");
8621                                 return r;
8622                         }
8623                 }
8624         } else {
8625                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8626                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8627                     !rdev->mc_fw) {
8628                         r = cik_init_microcode(rdev);
8629                         if (r) {
8630                                 DRM_ERROR("Failed to load firmware!\n");
8631                                 return r;
8632                         }
8633                 }
8634         }
8635
8636         /* Initialize power management */
8637         radeon_pm_init(rdev);
8638
8639         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8640         ring->ring_obj = NULL;
8641         r600_ring_init(rdev, ring, 1024 * 1024);
8642
8643         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8644         ring->ring_obj = NULL;
8645         r600_ring_init(rdev, ring, 1024 * 1024);
8646         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8647         if (r)
8648                 return r;
8649
8650         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8651         ring->ring_obj = NULL;
8652         r600_ring_init(rdev, ring, 1024 * 1024);
8653         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8654         if (r)
8655                 return r;
8656
8657         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8658         ring->ring_obj = NULL;
8659         r600_ring_init(rdev, ring, 256 * 1024);
8660
8661         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8662         ring->ring_obj = NULL;
8663         r600_ring_init(rdev, ring, 256 * 1024);
8664
8665         cik_uvd_init(rdev);
8666         cik_vce_init(rdev);
8667
8668         rdev->ih.ring_obj = NULL;
8669         r600_ih_ring_init(rdev, 64 * 1024);
8670
8671         r = r600_pcie_gart_init(rdev);
8672         if (r)
8673                 return r;
8674
8675         rdev->accel_working = true;
8676         r = cik_startup(rdev);
8677         if (r) {
8678                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8679                 cik_cp_fini(rdev);
8680                 cik_sdma_fini(rdev);
8681                 cik_irq_fini(rdev);
8682                 sumo_rlc_fini(rdev);
8683                 cik_mec_fini(rdev);
8684                 radeon_wb_fini(rdev);
8685                 radeon_ib_pool_fini(rdev);
8686                 radeon_vm_manager_fini(rdev);
8687                 radeon_irq_kms_fini(rdev);
8688                 cik_pcie_gart_fini(rdev);
8689                 rdev->accel_working = false;
8690         }
8691
8692         /* Don't start up if the MC ucode is missing.
8693          * The default clocks and voltages before the MC ucode
8694          * is loaded are not suffient for advanced operations.
8695          */
8696         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8697                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8698                 return -EINVAL;
8699         }
8700
8701         return 0;
8702 }
8703
8704 /**
8705  * cik_fini - asic specific driver and hw fini
8706  *
8707  * @rdev: radeon_device pointer
8708  *
8709  * Tear down the asic specific driver variables and program the hw
8710  * to an idle state (CIK).
8711  * Called at driver unload.
8712  */
8713 void cik_fini(struct radeon_device *rdev)
8714 {
8715         radeon_pm_fini(rdev);
8716         cik_cp_fini(rdev);
8717         cik_sdma_fini(rdev);
8718         cik_fini_pg(rdev);
8719         cik_fini_cg(rdev);
8720         cik_irq_fini(rdev);
8721         sumo_rlc_fini(rdev);
8722         cik_mec_fini(rdev);
8723         radeon_wb_fini(rdev);
8724         radeon_vm_manager_fini(rdev);
8725         radeon_ib_pool_fini(rdev);
8726         radeon_irq_kms_fini(rdev);
8727         uvd_v1_0_fini(rdev);
8728         radeon_uvd_fini(rdev);
8729         radeon_vce_fini(rdev);
8730         cik_pcie_gart_fini(rdev);
8731         r600_vram_scratch_fini(rdev);
8732         radeon_gem_fini(rdev);
8733         radeon_fence_driver_fini(rdev);
8734         radeon_bo_fini(rdev);
8735         radeon_atombios_fini(rdev);
8736         kfree(rdev->bios);
8737         rdev->bios = NULL;
8738 }
8739
8740 void dce8_program_fmt(struct drm_encoder *encoder)
8741 {
8742         struct drm_device *dev = encoder->dev;
8743         struct radeon_device *rdev = dev->dev_private;
8744         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8745         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8746         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8747         int bpc = 0;
8748         u32 tmp = 0;
8749         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8750
8751         if (connector) {
8752                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8753                 bpc = radeon_get_monitor_bpc(connector);
8754                 dither = radeon_connector->dither;
8755         }
8756
8757         /* LVDS/eDP FMT is set up by atom */
8758         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8759                 return;
8760
8761         /* not needed for analog */
8762         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8763             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8764                 return;
8765
8766         if (bpc == 0)
8767                 return;
8768
8769         switch (bpc) {
8770         case 6:
8771                 if (dither == RADEON_FMT_DITHER_ENABLE)
8772                         /* XXX sort out optimal dither settings */
8773                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8774                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8775                 else
8776                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8777                 break;
8778         case 8:
8779                 if (dither == RADEON_FMT_DITHER_ENABLE)
8780                         /* XXX sort out optimal dither settings */
8781                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8782                                 FMT_RGB_RANDOM_ENABLE |
8783                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8784                 else
8785                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8786                 break;
8787         case 10:
8788                 if (dither == RADEON_FMT_DITHER_ENABLE)
8789                         /* XXX sort out optimal dither settings */
8790                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8791                                 FMT_RGB_RANDOM_ENABLE |
8792                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8793                 else
8794                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8795                 break;
8796         default:
8797                 /* not needed */
8798                 break;
8799         }
8800
8801         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8802 }
8803
8804 /* display watermark setup */
8805 /**
8806  * dce8_line_buffer_adjust - Set up the line buffer
8807  *
8808  * @rdev: radeon_device pointer
8809  * @radeon_crtc: the selected display controller
8810  * @mode: the current display mode on the selected display
8811  * controller
8812  *
8813  * Setup up the line buffer allocation for
8814  * the selected display controller (CIK).
8815  * Returns the line buffer size in pixels.
8816  */
8817 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8818                                    struct radeon_crtc *radeon_crtc,
8819                                    struct drm_display_mode *mode)
8820 {
8821         u32 tmp, buffer_alloc, i;
8822         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8823         /*
8824          * Line Buffer Setup
8825          * There are 6 line buffers, one for each display controllers.
8826          * There are 3 partitions per LB. Select the number of partitions
8827          * to enable based on the display width.  For display widths larger
8828          * than 4096, you need use to use 2 display controllers and combine
8829          * them using the stereo blender.
8830          */
8831         if (radeon_crtc->base.enabled && mode) {
8832                 if (mode->crtc_hdisplay < 1920) {
8833                         tmp = 1;
8834                         buffer_alloc = 2;
8835                 } else if (mode->crtc_hdisplay < 2560) {
8836                         tmp = 2;
8837                         buffer_alloc = 2;
8838                 } else if (mode->crtc_hdisplay < 4096) {
8839                         tmp = 0;
8840                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8841                 } else {
8842                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8843                         tmp = 0;
8844                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8845                 }
8846         } else {
8847                 tmp = 1;
8848                 buffer_alloc = 0;
8849         }
8850
8851         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8852                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8853
8854         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8855                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8856         for (i = 0; i < rdev->usec_timeout; i++) {
8857                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8858                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8859                         break;
8860                 udelay(1);
8861         }
8862
8863         if (radeon_crtc->base.enabled && mode) {
8864                 switch (tmp) {
8865                 case 0:
8866                 default:
8867                         return 4096 * 2;
8868                 case 1:
8869                         return 1920 * 2;
8870                 case 2:
8871                         return 2560 * 2;
8872                 }
8873         }
8874
8875         /* controller not enabled, so no lb used */
8876         return 0;
8877 }
8878
8879 /**
8880  * cik_get_number_of_dram_channels - get the number of dram channels
8881  *
8882  * @rdev: radeon_device pointer
8883  *
8884  * Look up the number of video ram channels (CIK).
8885  * Used for display watermark bandwidth calculations
8886  * Returns the number of dram channels
8887  */
8888 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8889 {
8890         u32 tmp = RREG32(MC_SHARED_CHMAP);
8891
8892         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8893         case 0:
8894         default:
8895                 return 1;
8896         case 1:
8897                 return 2;
8898         case 2:
8899                 return 4;
8900         case 3:
8901                 return 8;
8902         case 4:
8903                 return 3;
8904         case 5:
8905                 return 6;
8906         case 6:
8907                 return 10;
8908         case 7:
8909                 return 12;
8910         case 8:
8911                 return 16;
8912         }
8913 }
8914
8915 struct dce8_wm_params {
8916         u32 dram_channels; /* number of dram channels */
8917         u32 yclk;          /* bandwidth per dram data pin in kHz */
8918         u32 sclk;          /* engine clock in kHz */
8919         u32 disp_clk;      /* display clock in kHz */
8920         u32 src_width;     /* viewport width */
8921         u32 active_time;   /* active display time in ns */
8922         u32 blank_time;    /* blank time in ns */
8923         bool interlaced;    /* mode is interlaced */
8924         fixed20_12 vsc;    /* vertical scale ratio */
8925         u32 num_heads;     /* number of active crtcs */
8926         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8927         u32 lb_size;       /* line buffer allocated to pipe */
8928         u32 vtaps;         /* vertical scaler taps */
8929 };
8930
8931 /**
8932  * dce8_dram_bandwidth - get the dram bandwidth
8933  *
8934  * @wm: watermark calculation data
8935  *
8936  * Calculate the raw dram bandwidth (CIK).
8937  * Used for display watermark bandwidth calculations
8938  * Returns the dram bandwidth in MBytes/s
8939  */
8940 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8941 {
8942         /* Calculate raw DRAM Bandwidth */
8943         fixed20_12 dram_efficiency; /* 0.7 */
8944         fixed20_12 yclk, dram_channels, bandwidth;
8945         fixed20_12 a;
8946
8947         a.full = dfixed_const(1000);
8948         yclk.full = dfixed_const(wm->yclk);
8949         yclk.full = dfixed_div(yclk, a);
8950         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8951         a.full = dfixed_const(10);
8952         dram_efficiency.full = dfixed_const(7);
8953         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8954         bandwidth.full = dfixed_mul(dram_channels, yclk);
8955         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8956
8957         return dfixed_trunc(bandwidth);
8958 }
8959
8960 /**
8961  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8962  *
8963  * @wm: watermark calculation data
8964  *
8965  * Calculate the dram bandwidth used for display (CIK).
8966  * Used for display watermark bandwidth calculations
8967  * Returns the dram bandwidth for display in MBytes/s
8968  */
8969 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8970 {
8971         /* Calculate DRAM Bandwidth and the part allocated to display. */
8972         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8973         fixed20_12 yclk, dram_channels, bandwidth;
8974         fixed20_12 a;
8975
8976         a.full = dfixed_const(1000);
8977         yclk.full = dfixed_const(wm->yclk);
8978         yclk.full = dfixed_div(yclk, a);
8979         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8980         a.full = dfixed_const(10);
8981         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8982         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8983         bandwidth.full = dfixed_mul(dram_channels, yclk);
8984         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8985
8986         return dfixed_trunc(bandwidth);
8987 }
8988
8989 /**
8990  * dce8_data_return_bandwidth - get the data return bandwidth
8991  *
8992  * @wm: watermark calculation data
8993  *
8994  * Calculate the data return bandwidth used for display (CIK).
8995  * Used for display watermark bandwidth calculations
8996  * Returns the data return bandwidth in MBytes/s
8997  */
8998 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8999 {
9000         /* Calculate the display Data return Bandwidth */
9001         fixed20_12 return_efficiency; /* 0.8 */
9002         fixed20_12 sclk, bandwidth;
9003         fixed20_12 a;
9004
9005         a.full = dfixed_const(1000);
9006         sclk.full = dfixed_const(wm->sclk);
9007         sclk.full = dfixed_div(sclk, a);
9008         a.full = dfixed_const(10);
9009         return_efficiency.full = dfixed_const(8);
9010         return_efficiency.full = dfixed_div(return_efficiency, a);
9011         a.full = dfixed_const(32);
9012         bandwidth.full = dfixed_mul(a, sclk);
9013         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9014
9015         return dfixed_trunc(bandwidth);
9016 }
9017
9018 /**
9019  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9020  *
9021  * @wm: watermark calculation data
9022  *
9023  * Calculate the dmif bandwidth used for display (CIK).
9024  * Used for display watermark bandwidth calculations
9025  * Returns the dmif bandwidth in MBytes/s
9026  */
9027 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9028 {
9029         /* Calculate the DMIF Request Bandwidth */
9030         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9031         fixed20_12 disp_clk, bandwidth;
9032         fixed20_12 a, b;
9033
9034         a.full = dfixed_const(1000);
9035         disp_clk.full = dfixed_const(wm->disp_clk);
9036         disp_clk.full = dfixed_div(disp_clk, a);
9037         a.full = dfixed_const(32);
9038         b.full = dfixed_mul(a, disp_clk);
9039
9040         a.full = dfixed_const(10);
9041         disp_clk_request_efficiency.full = dfixed_const(8);
9042         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9043
9044         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9045
9046         return dfixed_trunc(bandwidth);
9047 }
9048
9049 /**
9050  * dce8_available_bandwidth - get the min available bandwidth
9051  *
9052  * @wm: watermark calculation data
9053  *
9054  * Calculate the min available bandwidth used for display (CIK).
9055  * Used for display watermark bandwidth calculations
9056  * Returns the min available bandwidth in MBytes/s
9057  */
9058 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9059 {
9060         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9061         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9062         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9063         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9064
9065         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9066 }
9067
9068 /**
9069  * dce8_average_bandwidth - get the average available bandwidth
9070  *
9071  * @wm: watermark calculation data
9072  *
9073  * Calculate the average available bandwidth used for display (CIK).
9074  * Used for display watermark bandwidth calculations
9075  * Returns the average available bandwidth in MBytes/s
9076  */
9077 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9078 {
9079         /* Calculate the display mode Average Bandwidth
9080          * DisplayMode should contain the source and destination dimensions,
9081          * timing, etc.
9082          */
9083         fixed20_12 bpp;
9084         fixed20_12 line_time;
9085         fixed20_12 src_width;
9086         fixed20_12 bandwidth;
9087         fixed20_12 a;
9088
9089         a.full = dfixed_const(1000);
9090         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9091         line_time.full = dfixed_div(line_time, a);
9092         bpp.full = dfixed_const(wm->bytes_per_pixel);
9093         src_width.full = dfixed_const(wm->src_width);
9094         bandwidth.full = dfixed_mul(src_width, bpp);
9095         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9096         bandwidth.full = dfixed_div(bandwidth, line_time);
9097
9098         return dfixed_trunc(bandwidth);
9099 }
9100
9101 /**
9102  * dce8_latency_watermark - get the latency watermark
9103  *
9104  * @wm: watermark calculation data
9105  *
9106  * Calculate the latency watermark (CIK).
9107  * Used for display watermark bandwidth calculations
9108  * Returns the latency watermark in ns
9109  */
9110 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9111 {
9112         /* First calculate the latency in ns */
9113         u32 mc_latency = 2000; /* 2000 ns. */
9114         u32 available_bandwidth = dce8_available_bandwidth(wm);
9115         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9116         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9117         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9118         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9119                 (wm->num_heads * cursor_line_pair_return_time);
9120         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9121         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9122         u32 tmp, dmif_size = 12288;
9123         fixed20_12 a, b, c;
9124
9125         if (wm->num_heads == 0)
9126                 return 0;
9127
9128         a.full = dfixed_const(2);
9129         b.full = dfixed_const(1);
9130         if ((wm->vsc.full > a.full) ||
9131             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9132             (wm->vtaps >= 5) ||
9133             ((wm->vsc.full >= a.full) && wm->interlaced))
9134                 max_src_lines_per_dst_line = 4;
9135         else
9136                 max_src_lines_per_dst_line = 2;
9137
9138         a.full = dfixed_const(available_bandwidth);
9139         b.full = dfixed_const(wm->num_heads);
9140         a.full = dfixed_div(a, b);
9141         tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9142         tmp = min(dfixed_trunc(a), tmp);
9143
9144         lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9145
9146         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9147         b.full = dfixed_const(1000);
9148         c.full = dfixed_const(lb_fill_bw);
9149         b.full = dfixed_div(c, b);
9150         a.full = dfixed_div(a, b);
9151         line_fill_time = dfixed_trunc(a);
9152
9153         if (line_fill_time < wm->active_time)
9154                 return latency;
9155         else
9156                 return latency + (line_fill_time - wm->active_time);
9157
9158 }
9159
9160 /**
9161  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9162  * average and available dram bandwidth
9163  *
9164  * @wm: watermark calculation data
9165  *
9166  * Check if the display average bandwidth fits in the display
9167  * dram bandwidth (CIK).
9168  * Used for display watermark bandwidth calculations
9169  * Returns true if the display fits, false if not.
9170  */
9171 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9172 {
9173         if (dce8_average_bandwidth(wm) <=
9174             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9175                 return true;
9176         else
9177                 return false;
9178 }
9179
9180 /**
9181  * dce8_average_bandwidth_vs_available_bandwidth - check
9182  * average and available bandwidth
9183  *
9184  * @wm: watermark calculation data
9185  *
9186  * Check if the display average bandwidth fits in the display
9187  * available bandwidth (CIK).
9188  * Used for display watermark bandwidth calculations
9189  * Returns true if the display fits, false if not.
9190  */
9191 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9192 {
9193         if (dce8_average_bandwidth(wm) <=
9194             (dce8_available_bandwidth(wm) / wm->num_heads))
9195                 return true;
9196         else
9197                 return false;
9198 }
9199
9200 /**
9201  * dce8_check_latency_hiding - check latency hiding
9202  *
9203  * @wm: watermark calculation data
9204  *
9205  * Check latency hiding (CIK).
9206  * Used for display watermark bandwidth calculations
9207  * Returns true if the display fits, false if not.
9208  */
9209 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9210 {
9211         u32 lb_partitions = wm->lb_size / wm->src_width;
9212         u32 line_time = wm->active_time + wm->blank_time;
9213         u32 latency_tolerant_lines;
9214         u32 latency_hiding;
9215         fixed20_12 a;
9216
9217         a.full = dfixed_const(1);
9218         if (wm->vsc.full > a.full)
9219                 latency_tolerant_lines = 1;
9220         else {
9221                 if (lb_partitions <= (wm->vtaps + 1))
9222                         latency_tolerant_lines = 1;
9223                 else
9224                         latency_tolerant_lines = 2;
9225         }
9226
9227         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9228
9229         if (dce8_latency_watermark(wm) <= latency_hiding)
9230                 return true;
9231         else
9232                 return false;
9233 }
9234
9235 /**
9236  * dce8_program_watermarks - program display watermarks
9237  *
9238  * @rdev: radeon_device pointer
9239  * @radeon_crtc: the selected display controller
9240  * @lb_size: line buffer size
9241  * @num_heads: number of display controllers in use
9242  *
9243  * Calculate and program the display watermarks for the
9244  * selected display controller (CIK).
9245  */
9246 static void dce8_program_watermarks(struct radeon_device *rdev,
9247                                     struct radeon_crtc *radeon_crtc,
9248                                     u32 lb_size, u32 num_heads)
9249 {
9250         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9251         struct dce8_wm_params wm_low, wm_high;
9252         u32 active_time;
9253         u32 line_time = 0;
9254         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9255         u32 tmp, wm_mask;
9256
9257         if (radeon_crtc->base.enabled && num_heads && mode) {
9258                 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9259                                             (u32)mode->clock);
9260                 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9261                                           (u32)mode->clock);
9262                 line_time = min(line_time, (u32)65535);
9263
9264                 /* watermark for high clocks */
9265                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9266                     rdev->pm.dpm_enabled) {
9267                         wm_high.yclk =
9268                                 radeon_dpm_get_mclk(rdev, false) * 10;
9269                         wm_high.sclk =
9270                                 radeon_dpm_get_sclk(rdev, false) * 10;
9271                 } else {
9272                         wm_high.yclk = rdev->pm.current_mclk * 10;
9273                         wm_high.sclk = rdev->pm.current_sclk * 10;
9274                 }
9275
9276                 wm_high.disp_clk = mode->clock;
9277                 wm_high.src_width = mode->crtc_hdisplay;
9278                 wm_high.active_time = active_time;
9279                 wm_high.blank_time = line_time - wm_high.active_time;
9280                 wm_high.interlaced = false;
9281                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9282                         wm_high.interlaced = true;
9283                 wm_high.vsc = radeon_crtc->vsc;
9284                 wm_high.vtaps = 1;
9285                 if (radeon_crtc->rmx_type != RMX_OFF)
9286                         wm_high.vtaps = 2;
9287                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9288                 wm_high.lb_size = lb_size;
9289                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9290                 wm_high.num_heads = num_heads;
9291
9292                 /* set for high clocks */
9293                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9294
9295                 /* possibly force display priority to high */
9296                 /* should really do this at mode validation time... */
9297                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9298                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9299                     !dce8_check_latency_hiding(&wm_high) ||
9300                     (rdev->disp_priority == 2)) {
9301                         DRM_DEBUG_KMS("force priority to high\n");
9302                 }
9303
9304                 /* watermark for low clocks */
9305                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9306                     rdev->pm.dpm_enabled) {
9307                         wm_low.yclk =
9308                                 radeon_dpm_get_mclk(rdev, true) * 10;
9309                         wm_low.sclk =
9310                                 radeon_dpm_get_sclk(rdev, true) * 10;
9311                 } else {
9312                         wm_low.yclk = rdev->pm.current_mclk * 10;
9313                         wm_low.sclk = rdev->pm.current_sclk * 10;
9314                 }
9315
9316                 wm_low.disp_clk = mode->clock;
9317                 wm_low.src_width = mode->crtc_hdisplay;
9318                 wm_low.active_time = active_time;
9319                 wm_low.blank_time = line_time - wm_low.active_time;
9320                 wm_low.interlaced = false;
9321                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9322                         wm_low.interlaced = true;
9323                 wm_low.vsc = radeon_crtc->vsc;
9324                 wm_low.vtaps = 1;
9325                 if (radeon_crtc->rmx_type != RMX_OFF)
9326                         wm_low.vtaps = 2;
9327                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9328                 wm_low.lb_size = lb_size;
9329                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9330                 wm_low.num_heads = num_heads;
9331
9332                 /* set for low clocks */
9333                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9334
9335                 /* possibly force display priority to high */
9336                 /* should really do this at mode validation time... */
9337                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9338                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9339                     !dce8_check_latency_hiding(&wm_low) ||
9340                     (rdev->disp_priority == 2)) {
9341                         DRM_DEBUG_KMS("force priority to high\n");
9342                 }
9343
9344                 /* Save number of lines the linebuffer leads before the scanout */
9345                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9346         }
9347
9348         /* select wm A */
9349         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9350         tmp = wm_mask;
9351         tmp &= ~LATENCY_WATERMARK_MASK(3);
9352         tmp |= LATENCY_WATERMARK_MASK(1);
9353         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9354         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9355                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9356                 LATENCY_HIGH_WATERMARK(line_time)));
9357         /* select wm B */
9358         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9359         tmp &= ~LATENCY_WATERMARK_MASK(3);
9360         tmp |= LATENCY_WATERMARK_MASK(2);
9361         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9362         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9363                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9364                 LATENCY_HIGH_WATERMARK(line_time)));
9365         /* restore original selection */
9366         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9367
9368         /* save values for DPM */
9369         radeon_crtc->line_time = line_time;
9370         radeon_crtc->wm_high = latency_watermark_a;
9371         radeon_crtc->wm_low = latency_watermark_b;
9372 }
9373
9374 /**
9375  * dce8_bandwidth_update - program display watermarks
9376  *
9377  * @rdev: radeon_device pointer
9378  *
9379  * Calculate and program the display watermarks and line
9380  * buffer allocation (CIK).
9381  */
9382 void dce8_bandwidth_update(struct radeon_device *rdev)
9383 {
9384         struct drm_display_mode *mode = NULL;
9385         u32 num_heads = 0, lb_size;
9386         int i;
9387
9388         if (!rdev->mode_info.mode_config_initialized)
9389                 return;
9390
9391         radeon_update_display_priority(rdev);
9392
9393         for (i = 0; i < rdev->num_crtc; i++) {
9394                 if (rdev->mode_info.crtcs[i]->base.enabled)
9395                         num_heads++;
9396         }
9397         for (i = 0; i < rdev->num_crtc; i++) {
9398                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9399                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9400                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9401         }
9402 }
9403
9404 /**
9405  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9406  *
9407  * @rdev: radeon_device pointer
9408  *
9409  * Fetches a GPU clock counter snapshot (SI).
9410  * Returns the 64 bit clock counter snapshot.
9411  */
9412 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9413 {
9414         uint64_t clock;
9415
9416         mutex_lock(&rdev->gpu_clock_mutex);
9417         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9418         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9419                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9420         mutex_unlock(&rdev->gpu_clock_mutex);
9421         return clock;
9422 }
9423
9424 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9425                              u32 cntl_reg, u32 status_reg)
9426 {
9427         int r, i;
9428         struct atom_clock_dividers dividers;
9429         uint32_t tmp;
9430
9431         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9432                                            clock, false, &dividers);
9433         if (r)
9434                 return r;
9435
9436         tmp = RREG32_SMC(cntl_reg);
9437         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9438         tmp |= dividers.post_divider;
9439         WREG32_SMC(cntl_reg, tmp);
9440
9441         for (i = 0; i < 100; i++) {
9442                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9443                         break;
9444                 mdelay(10);
9445         }
9446         if (i == 100)
9447                 return -ETIMEDOUT;
9448
9449         return 0;
9450 }
9451
9452 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9453 {
9454         int r = 0;
9455
9456         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9457         if (r)
9458                 return r;
9459
9460         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9461         return r;
9462 }
9463
9464 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9465 {
9466         int r, i;
9467         struct atom_clock_dividers dividers;
9468         u32 tmp;
9469
9470         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9471                                            ecclk, false, &dividers);
9472         if (r)
9473                 return r;
9474
9475         for (i = 0; i < 100; i++) {
9476                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9477                         break;
9478                 mdelay(10);
9479         }
9480         if (i == 100)
9481                 return -ETIMEDOUT;
9482
9483         tmp = RREG32_SMC(CG_ECLK_CNTL);
9484         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9485         tmp |= dividers.post_divider;
9486         WREG32_SMC(CG_ECLK_CNTL, tmp);
9487
9488         for (i = 0; i < 100; i++) {
9489                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9490                         break;
9491                 mdelay(10);
9492         }
9493         if (i == 100)
9494                 return -ETIMEDOUT;
9495
9496         return 0;
9497 }
9498
9499 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9500 {
9501         struct pci_dev *root = rdev->pdev->bus->self;
9502         enum pci_bus_speed speed_cap;
9503         u32 speed_cntl, current_data_rate;
9504         int i;
9505         u16 tmp16;
9506
9507         if (pci_is_root_bus(rdev->pdev->bus))
9508                 return;
9509
9510         if (radeon_pcie_gen2 == 0)
9511                 return;
9512
9513         if (rdev->flags & RADEON_IS_IGP)
9514                 return;
9515
9516         if (!(rdev->flags & RADEON_IS_PCIE))
9517                 return;
9518
9519         speed_cap = pcie_get_speed_cap(root);
9520         if (speed_cap == PCI_SPEED_UNKNOWN)
9521                 return;
9522
9523         if ((speed_cap != PCIE_SPEED_8_0GT) &&
9524             (speed_cap != PCIE_SPEED_5_0GT))
9525                 return;
9526
9527         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9528         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9529                 LC_CURRENT_DATA_RATE_SHIFT;
9530         if (speed_cap == PCIE_SPEED_8_0GT) {
9531                 if (current_data_rate == 2) {
9532                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9533                         return;
9534                 }
9535                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9536         } else if (speed_cap == PCIE_SPEED_5_0GT) {
9537                 if (current_data_rate == 1) {
9538                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9539                         return;
9540                 }
9541                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9542         }
9543
9544         if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
9545                 return;
9546
9547         if (speed_cap == PCIE_SPEED_8_0GT) {
9548                 /* re-try equalization if gen3 is not already enabled */
9549                 if (current_data_rate != 2) {
9550                         u16 bridge_cfg, gpu_cfg;
9551                         u16 bridge_cfg2, gpu_cfg2;
9552                         u32 max_lw, current_lw, tmp;
9553
9554                         pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9555                                                   &bridge_cfg);
9556                         pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
9557                                                   &gpu_cfg);
9558
9559                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9560                         pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
9561
9562                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9563                         pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
9564                                                    tmp16);
9565
9566                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9567                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9568                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9569
9570                         if (current_lw < max_lw) {
9571                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9572                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9573                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9574                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9575                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9576                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9577                                 }
9578                         }
9579
9580                         for (i = 0; i < 10; i++) {
9581                                 /* check status */
9582                                 pcie_capability_read_word(rdev->pdev,
9583                                                           PCI_EXP_DEVSTA,
9584                                                           &tmp16);
9585                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9586                                         break;
9587
9588                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9589                                                           &bridge_cfg);
9590                                 pcie_capability_read_word(rdev->pdev,
9591                                                           PCI_EXP_LNKCTL,
9592                                                           &gpu_cfg);
9593
9594                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
9595                                                           &bridge_cfg2);
9596                                 pcie_capability_read_word(rdev->pdev,
9597                                                           PCI_EXP_LNKCTL2,
9598                                                           &gpu_cfg2);
9599
9600                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9601                                 tmp |= LC_SET_QUIESCE;
9602                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9603
9604                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9605                                 tmp |= LC_REDO_EQ;
9606                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9607
9608                                 msleep(100);
9609
9610                                 /* linkctl */
9611                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9612                                                           &tmp16);
9613                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9614                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9615                                 pcie_capability_write_word(root, PCI_EXP_LNKCTL,
9616                                                            tmp16);
9617
9618                                 pcie_capability_read_word(rdev->pdev,
9619                                                           PCI_EXP_LNKCTL,
9620                                                           &tmp16);
9621                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9622                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9623                                 pcie_capability_write_word(rdev->pdev,
9624                                                            PCI_EXP_LNKCTL,
9625                                                            tmp16);
9626
9627                                 /* linkctl2 */
9628                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
9629                                                           &tmp16);
9630                                 tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
9631                                            PCI_EXP_LNKCTL2_TX_MARGIN);
9632                                 tmp16 |= (bridge_cfg2 &
9633                                           (PCI_EXP_LNKCTL2_ENTER_COMP |
9634                                            PCI_EXP_LNKCTL2_TX_MARGIN));
9635                                 pcie_capability_write_word(root,
9636                                                            PCI_EXP_LNKCTL2,
9637                                                            tmp16);
9638
9639                                 pcie_capability_read_word(rdev->pdev,
9640                                                           PCI_EXP_LNKCTL2,
9641                                                           &tmp16);
9642                                 tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
9643                                            PCI_EXP_LNKCTL2_TX_MARGIN);
9644                                 tmp16 |= (gpu_cfg2 &
9645                                           (PCI_EXP_LNKCTL2_ENTER_COMP |
9646                                            PCI_EXP_LNKCTL2_TX_MARGIN));
9647                                 pcie_capability_write_word(rdev->pdev,
9648                                                            PCI_EXP_LNKCTL2,
9649                                                            tmp16);
9650
9651                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9652                                 tmp &= ~LC_SET_QUIESCE;
9653                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9654                         }
9655                 }
9656         }
9657
9658         /* set the link speed */
9659         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9660         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9661         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9662
9663         pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
9664         tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
9665         if (speed_cap == PCIE_SPEED_8_0GT)
9666                 tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
9667         else if (speed_cap == PCIE_SPEED_5_0GT)
9668                 tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
9669         else
9670                 tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
9671         pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
9672
9673         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9674         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9675         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9676
9677         for (i = 0; i < rdev->usec_timeout; i++) {
9678                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9679                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9680                         break;
9681                 udelay(1);
9682         }
9683 }
9684
9685 static void cik_program_aspm(struct radeon_device *rdev)
9686 {
9687         u32 data, orig;
9688         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9689         bool disable_clkreq = false;
9690
9691         if (radeon_aspm == 0)
9692                 return;
9693
9694         /* XXX double check IGPs */
9695         if (rdev->flags & RADEON_IS_IGP)
9696                 return;
9697
9698         if (!(rdev->flags & RADEON_IS_PCIE))
9699                 return;
9700
9701         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9702         data &= ~LC_XMIT_N_FTS_MASK;
9703         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9704         if (orig != data)
9705                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9706
9707         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9708         data |= LC_GO_TO_RECOVERY;
9709         if (orig != data)
9710                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9711
9712         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9713         data |= P_IGNORE_EDB_ERR;
9714         if (orig != data)
9715                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9716
9717         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9718         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9719         data |= LC_PMI_TO_L1_DIS;
9720         if (!disable_l0s)
9721                 data |= LC_L0S_INACTIVITY(7);
9722
9723         if (!disable_l1) {
9724                 data |= LC_L1_INACTIVITY(7);
9725                 data &= ~LC_PMI_TO_L1_DIS;
9726                 if (orig != data)
9727                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9728
9729                 if (!disable_plloff_in_l1) {
9730                         bool clk_req_support;
9731
9732                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9733                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9734                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9735                         if (orig != data)
9736                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9737
9738                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9739                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9740                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9741                         if (orig != data)
9742                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9743
9744                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9745                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9746                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9747                         if (orig != data)
9748                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9749
9750                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9751                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9752                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9753                         if (orig != data)
9754                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9755
9756                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9757                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9758                         data |= LC_DYN_LANES_PWR_STATE(3);
9759                         if (orig != data)
9760                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9761
9762                         if (!disable_clkreq &&
9763                             !pci_is_root_bus(rdev->pdev->bus)) {
9764                                 struct pci_dev *root = rdev->pdev->bus->self;
9765                                 u32 lnkcap;
9766
9767                                 clk_req_support = false;
9768                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9769                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9770                                         clk_req_support = true;
9771                         } else {
9772                                 clk_req_support = false;
9773                         }
9774
9775                         if (clk_req_support) {
9776                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9777                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9778                                 if (orig != data)
9779                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9780
9781                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9782                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9783                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9784                                 if (orig != data)
9785                                         WREG32_SMC(THM_CLK_CNTL, data);
9786
9787                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9788                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9789                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9790                                 if (orig != data)
9791                                         WREG32_SMC(MISC_CLK_CTRL, data);
9792
9793                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9794                                 data &= ~BCLK_AS_XCLK;
9795                                 if (orig != data)
9796                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9797
9798                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9799                                 data &= ~FORCE_BIF_REFCLK_EN;
9800                                 if (orig != data)
9801                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9802
9803                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9804                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9805                                 data |= MPLL_CLKOUT_SEL(4);
9806                                 if (orig != data)
9807                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9808                         }
9809                 }
9810         } else {
9811                 if (orig != data)
9812                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9813         }
9814
9815         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9816         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9817         if (orig != data)
9818                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9819
9820         if (!disable_l0s) {
9821                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9822                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9823                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9824                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9825                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9826                                 data &= ~LC_L0S_INACTIVITY_MASK;
9827                                 if (orig != data)
9828                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9829                         }
9830                 }
9831         }
9832 }