Merge tag 'block-5.15-2021-10-29' of git://git.kernel.dk/linux-block
[platform/kernel/linux-rpi.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include <linux/slab.h>
29
30 #include <drm/drm_vblank.h>
31 #include <drm/radeon_drm.h>
32
33 #include "atom.h"
34 #include "clearstate_si.h"
35 #include "evergreen.h"
36 #include "r600.h"
37 #include "radeon.h"
38 #include "radeon_asic.h"
39 #include "radeon_audio.h"
40 #include "radeon_ucode.h"
41 #include "si_blit_shaders.h"
42 #include "si.h"
43 #include "sid.h"
44
45
46 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
48 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
49 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
50 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
51 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
52 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
53
54 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
55 MODULE_FIRMWARE("radeon/tahiti_me.bin");
56 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
57 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
58 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
59 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
60
61 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
62 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
63 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
64 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
65 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
66 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
67 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
68
69 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
70 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
71 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
72 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
73 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
74 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
75 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
76
77 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
78 MODULE_FIRMWARE("radeon/VERDE_me.bin");
79 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
80 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
81 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
82 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
83 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
84
85 MODULE_FIRMWARE("radeon/verde_pfp.bin");
86 MODULE_FIRMWARE("radeon/verde_me.bin");
87 MODULE_FIRMWARE("radeon/verde_ce.bin");
88 MODULE_FIRMWARE("radeon/verde_mc.bin");
89 MODULE_FIRMWARE("radeon/verde_rlc.bin");
90 MODULE_FIRMWARE("radeon/verde_smc.bin");
91 MODULE_FIRMWARE("radeon/verde_k_smc.bin");
92
93 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
94 MODULE_FIRMWARE("radeon/OLAND_me.bin");
95 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
96 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
97 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
98 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
99 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
100
101 MODULE_FIRMWARE("radeon/oland_pfp.bin");
102 MODULE_FIRMWARE("radeon/oland_me.bin");
103 MODULE_FIRMWARE("radeon/oland_ce.bin");
104 MODULE_FIRMWARE("radeon/oland_mc.bin");
105 MODULE_FIRMWARE("radeon/oland_rlc.bin");
106 MODULE_FIRMWARE("radeon/oland_smc.bin");
107 MODULE_FIRMWARE("radeon/oland_k_smc.bin");
108
109 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
110 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
111 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
112 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
113 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
114 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
115 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
116
117 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
118 MODULE_FIRMWARE("radeon/hainan_me.bin");
119 MODULE_FIRMWARE("radeon/hainan_ce.bin");
120 MODULE_FIRMWARE("radeon/hainan_mc.bin");
121 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
122 MODULE_FIRMWARE("radeon/hainan_smc.bin");
123 MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
124 MODULE_FIRMWARE("radeon/banks_k_2_smc.bin");
125
126 MODULE_FIRMWARE("radeon/si58_mc.bin");
127
128 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
129 static void si_pcie_gen3_enable(struct radeon_device *rdev);
130 static void si_program_aspm(struct radeon_device *rdev);
131 extern void sumo_rlc_fini(struct radeon_device *rdev);
132 extern int sumo_rlc_init(struct radeon_device *rdev);
133 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
134                                          bool enable);
135 static void si_init_pg(struct radeon_device *rdev);
136 static void si_init_cg(struct radeon_device *rdev);
137 static void si_fini_pg(struct radeon_device *rdev);
138 static void si_fini_cg(struct radeon_device *rdev);
139 static void si_rlc_stop(struct radeon_device *rdev);
140
141 static const u32 crtc_offsets[] =
142 {
143         EVERGREEN_CRTC0_REGISTER_OFFSET,
144         EVERGREEN_CRTC1_REGISTER_OFFSET,
145         EVERGREEN_CRTC2_REGISTER_OFFSET,
146         EVERGREEN_CRTC3_REGISTER_OFFSET,
147         EVERGREEN_CRTC4_REGISTER_OFFSET,
148         EVERGREEN_CRTC5_REGISTER_OFFSET
149 };
150
151 static const u32 si_disp_int_status[] =
152 {
153         DISP_INTERRUPT_STATUS,
154         DISP_INTERRUPT_STATUS_CONTINUE,
155         DISP_INTERRUPT_STATUS_CONTINUE2,
156         DISP_INTERRUPT_STATUS_CONTINUE3,
157         DISP_INTERRUPT_STATUS_CONTINUE4,
158         DISP_INTERRUPT_STATUS_CONTINUE5
159 };
160
161 #define DC_HPDx_CONTROL(x)        (DC_HPD1_CONTROL     + (x * 0xc))
162 #define DC_HPDx_INT_CONTROL(x)    (DC_HPD1_INT_CONTROL + (x * 0xc))
163 #define DC_HPDx_INT_STATUS_REG(x) (DC_HPD1_INT_STATUS  + (x * 0xc))
164
165 static const u32 verde_rlc_save_restore_register_list[] =
166 {
167         (0x8000 << 16) | (0x98f4 >> 2),
168         0x00000000,
169         (0x8040 << 16) | (0x98f4 >> 2),
170         0x00000000,
171         (0x8000 << 16) | (0xe80 >> 2),
172         0x00000000,
173         (0x8040 << 16) | (0xe80 >> 2),
174         0x00000000,
175         (0x8000 << 16) | (0x89bc >> 2),
176         0x00000000,
177         (0x8040 << 16) | (0x89bc >> 2),
178         0x00000000,
179         (0x8000 << 16) | (0x8c1c >> 2),
180         0x00000000,
181         (0x8040 << 16) | (0x8c1c >> 2),
182         0x00000000,
183         (0x9c00 << 16) | (0x98f0 >> 2),
184         0x00000000,
185         (0x9c00 << 16) | (0xe7c >> 2),
186         0x00000000,
187         (0x8000 << 16) | (0x9148 >> 2),
188         0x00000000,
189         (0x8040 << 16) | (0x9148 >> 2),
190         0x00000000,
191         (0x9c00 << 16) | (0x9150 >> 2),
192         0x00000000,
193         (0x9c00 << 16) | (0x897c >> 2),
194         0x00000000,
195         (0x9c00 << 16) | (0x8d8c >> 2),
196         0x00000000,
197         (0x9c00 << 16) | (0xac54 >> 2),
198         0X00000000,
199         0x3,
200         (0x9c00 << 16) | (0x98f8 >> 2),
201         0x00000000,
202         (0x9c00 << 16) | (0x9910 >> 2),
203         0x00000000,
204         (0x9c00 << 16) | (0x9914 >> 2),
205         0x00000000,
206         (0x9c00 << 16) | (0x9918 >> 2),
207         0x00000000,
208         (0x9c00 << 16) | (0x991c >> 2),
209         0x00000000,
210         (0x9c00 << 16) | (0x9920 >> 2),
211         0x00000000,
212         (0x9c00 << 16) | (0x9924 >> 2),
213         0x00000000,
214         (0x9c00 << 16) | (0x9928 >> 2),
215         0x00000000,
216         (0x9c00 << 16) | (0x992c >> 2),
217         0x00000000,
218         (0x9c00 << 16) | (0x9930 >> 2),
219         0x00000000,
220         (0x9c00 << 16) | (0x9934 >> 2),
221         0x00000000,
222         (0x9c00 << 16) | (0x9938 >> 2),
223         0x00000000,
224         (0x9c00 << 16) | (0x993c >> 2),
225         0x00000000,
226         (0x9c00 << 16) | (0x9940 >> 2),
227         0x00000000,
228         (0x9c00 << 16) | (0x9944 >> 2),
229         0x00000000,
230         (0x9c00 << 16) | (0x9948 >> 2),
231         0x00000000,
232         (0x9c00 << 16) | (0x994c >> 2),
233         0x00000000,
234         (0x9c00 << 16) | (0x9950 >> 2),
235         0x00000000,
236         (0x9c00 << 16) | (0x9954 >> 2),
237         0x00000000,
238         (0x9c00 << 16) | (0x9958 >> 2),
239         0x00000000,
240         (0x9c00 << 16) | (0x995c >> 2),
241         0x00000000,
242         (0x9c00 << 16) | (0x9960 >> 2),
243         0x00000000,
244         (0x9c00 << 16) | (0x9964 >> 2),
245         0x00000000,
246         (0x9c00 << 16) | (0x9968 >> 2),
247         0x00000000,
248         (0x9c00 << 16) | (0x996c >> 2),
249         0x00000000,
250         (0x9c00 << 16) | (0x9970 >> 2),
251         0x00000000,
252         (0x9c00 << 16) | (0x9974 >> 2),
253         0x00000000,
254         (0x9c00 << 16) | (0x9978 >> 2),
255         0x00000000,
256         (0x9c00 << 16) | (0x997c >> 2),
257         0x00000000,
258         (0x9c00 << 16) | (0x9980 >> 2),
259         0x00000000,
260         (0x9c00 << 16) | (0x9984 >> 2),
261         0x00000000,
262         (0x9c00 << 16) | (0x9988 >> 2),
263         0x00000000,
264         (0x9c00 << 16) | (0x998c >> 2),
265         0x00000000,
266         (0x9c00 << 16) | (0x8c00 >> 2),
267         0x00000000,
268         (0x9c00 << 16) | (0x8c14 >> 2),
269         0x00000000,
270         (0x9c00 << 16) | (0x8c04 >> 2),
271         0x00000000,
272         (0x9c00 << 16) | (0x8c08 >> 2),
273         0x00000000,
274         (0x8000 << 16) | (0x9b7c >> 2),
275         0x00000000,
276         (0x8040 << 16) | (0x9b7c >> 2),
277         0x00000000,
278         (0x8000 << 16) | (0xe84 >> 2),
279         0x00000000,
280         (0x8040 << 16) | (0xe84 >> 2),
281         0x00000000,
282         (0x8000 << 16) | (0x89c0 >> 2),
283         0x00000000,
284         (0x8040 << 16) | (0x89c0 >> 2),
285         0x00000000,
286         (0x8000 << 16) | (0x914c >> 2),
287         0x00000000,
288         (0x8040 << 16) | (0x914c >> 2),
289         0x00000000,
290         (0x8000 << 16) | (0x8c20 >> 2),
291         0x00000000,
292         (0x8040 << 16) | (0x8c20 >> 2),
293         0x00000000,
294         (0x8000 << 16) | (0x9354 >> 2),
295         0x00000000,
296         (0x8040 << 16) | (0x9354 >> 2),
297         0x00000000,
298         (0x9c00 << 16) | (0x9060 >> 2),
299         0x00000000,
300         (0x9c00 << 16) | (0x9364 >> 2),
301         0x00000000,
302         (0x9c00 << 16) | (0x9100 >> 2),
303         0x00000000,
304         (0x9c00 << 16) | (0x913c >> 2),
305         0x00000000,
306         (0x8000 << 16) | (0x90e0 >> 2),
307         0x00000000,
308         (0x8000 << 16) | (0x90e4 >> 2),
309         0x00000000,
310         (0x8000 << 16) | (0x90e8 >> 2),
311         0x00000000,
312         (0x8040 << 16) | (0x90e0 >> 2),
313         0x00000000,
314         (0x8040 << 16) | (0x90e4 >> 2),
315         0x00000000,
316         (0x8040 << 16) | (0x90e8 >> 2),
317         0x00000000,
318         (0x9c00 << 16) | (0x8bcc >> 2),
319         0x00000000,
320         (0x9c00 << 16) | (0x8b24 >> 2),
321         0x00000000,
322         (0x9c00 << 16) | (0x88c4 >> 2),
323         0x00000000,
324         (0x9c00 << 16) | (0x8e50 >> 2),
325         0x00000000,
326         (0x9c00 << 16) | (0x8c0c >> 2),
327         0x00000000,
328         (0x9c00 << 16) | (0x8e58 >> 2),
329         0x00000000,
330         (0x9c00 << 16) | (0x8e5c >> 2),
331         0x00000000,
332         (0x9c00 << 16) | (0x9508 >> 2),
333         0x00000000,
334         (0x9c00 << 16) | (0x950c >> 2),
335         0x00000000,
336         (0x9c00 << 16) | (0x9494 >> 2),
337         0x00000000,
338         (0x9c00 << 16) | (0xac0c >> 2),
339         0x00000000,
340         (0x9c00 << 16) | (0xac10 >> 2),
341         0x00000000,
342         (0x9c00 << 16) | (0xac14 >> 2),
343         0x00000000,
344         (0x9c00 << 16) | (0xae00 >> 2),
345         0x00000000,
346         (0x9c00 << 16) | (0xac08 >> 2),
347         0x00000000,
348         (0x9c00 << 16) | (0x88d4 >> 2),
349         0x00000000,
350         (0x9c00 << 16) | (0x88c8 >> 2),
351         0x00000000,
352         (0x9c00 << 16) | (0x88cc >> 2),
353         0x00000000,
354         (0x9c00 << 16) | (0x89b0 >> 2),
355         0x00000000,
356         (0x9c00 << 16) | (0x8b10 >> 2),
357         0x00000000,
358         (0x9c00 << 16) | (0x8a14 >> 2),
359         0x00000000,
360         (0x9c00 << 16) | (0x9830 >> 2),
361         0x00000000,
362         (0x9c00 << 16) | (0x9834 >> 2),
363         0x00000000,
364         (0x9c00 << 16) | (0x9838 >> 2),
365         0x00000000,
366         (0x9c00 << 16) | (0x9a10 >> 2),
367         0x00000000,
368         (0x8000 << 16) | (0x9870 >> 2),
369         0x00000000,
370         (0x8000 << 16) | (0x9874 >> 2),
371         0x00000000,
372         (0x8001 << 16) | (0x9870 >> 2),
373         0x00000000,
374         (0x8001 << 16) | (0x9874 >> 2),
375         0x00000000,
376         (0x8040 << 16) | (0x9870 >> 2),
377         0x00000000,
378         (0x8040 << 16) | (0x9874 >> 2),
379         0x00000000,
380         (0x8041 << 16) | (0x9870 >> 2),
381         0x00000000,
382         (0x8041 << 16) | (0x9874 >> 2),
383         0x00000000,
384         0x00000000
385 };
386
387 static const u32 tahiti_golden_rlc_registers[] =
388 {
389         0xc424, 0xffffffff, 0x00601005,
390         0xc47c, 0xffffffff, 0x10104040,
391         0xc488, 0xffffffff, 0x0100000a,
392         0xc314, 0xffffffff, 0x00000800,
393         0xc30c, 0xffffffff, 0x800000f4,
394         0xf4a8, 0xffffffff, 0x00000000
395 };
396
397 static const u32 tahiti_golden_registers[] =
398 {
399         0x9a10, 0x00010000, 0x00018208,
400         0x9830, 0xffffffff, 0x00000000,
401         0x9834, 0xf00fffff, 0x00000400,
402         0x9838, 0x0002021c, 0x00020200,
403         0xc78, 0x00000080, 0x00000000,
404         0xd030, 0x000300c0, 0x00800040,
405         0xd830, 0x000300c0, 0x00800040,
406         0x5bb0, 0x000000f0, 0x00000070,
407         0x5bc0, 0x00200000, 0x50100000,
408         0x7030, 0x31000311, 0x00000011,
409         0x277c, 0x00000003, 0x000007ff,
410         0x240c, 0x000007ff, 0x00000000,
411         0x8a14, 0xf000001f, 0x00000007,
412         0x8b24, 0xffffffff, 0x00ffffff,
413         0x8b10, 0x0000ff0f, 0x00000000,
414         0x28a4c, 0x07ffffff, 0x4e000000,
415         0x28350, 0x3f3f3fff, 0x2a00126a,
416         0x30, 0x000000ff, 0x0040,
417         0x34, 0x00000040, 0x00004040,
418         0x9100, 0x07ffffff, 0x03000000,
419         0x8e88, 0x01ff1f3f, 0x00000000,
420         0x8e84, 0x01ff1f3f, 0x00000000,
421         0x9060, 0x0000007f, 0x00000020,
422         0x9508, 0x00010000, 0x00010000,
423         0xac14, 0x00000200, 0x000002fb,
424         0xac10, 0xffffffff, 0x0000543b,
425         0xac0c, 0xffffffff, 0xa9210876,
426         0x88d0, 0xffffffff, 0x000fff40,
427         0x88d4, 0x0000001f, 0x00000010,
428         0x1410, 0x20000000, 0x20fffed8,
429         0x15c0, 0x000c0fc0, 0x000c0400
430 };
431
432 static const u32 tahiti_golden_registers2[] =
433 {
434         0xc64, 0x00000001, 0x00000001
435 };
436
437 static const u32 pitcairn_golden_rlc_registers[] =
438 {
439         0xc424, 0xffffffff, 0x00601004,
440         0xc47c, 0xffffffff, 0x10102020,
441         0xc488, 0xffffffff, 0x01000020,
442         0xc314, 0xffffffff, 0x00000800,
443         0xc30c, 0xffffffff, 0x800000a4
444 };
445
446 static const u32 pitcairn_golden_registers[] =
447 {
448         0x9a10, 0x00010000, 0x00018208,
449         0x9830, 0xffffffff, 0x00000000,
450         0x9834, 0xf00fffff, 0x00000400,
451         0x9838, 0x0002021c, 0x00020200,
452         0xc78, 0x00000080, 0x00000000,
453         0xd030, 0x000300c0, 0x00800040,
454         0xd830, 0x000300c0, 0x00800040,
455         0x5bb0, 0x000000f0, 0x00000070,
456         0x5bc0, 0x00200000, 0x50100000,
457         0x7030, 0x31000311, 0x00000011,
458         0x2ae4, 0x00073ffe, 0x000022a2,
459         0x240c, 0x000007ff, 0x00000000,
460         0x8a14, 0xf000001f, 0x00000007,
461         0x8b24, 0xffffffff, 0x00ffffff,
462         0x8b10, 0x0000ff0f, 0x00000000,
463         0x28a4c, 0x07ffffff, 0x4e000000,
464         0x28350, 0x3f3f3fff, 0x2a00126a,
465         0x30, 0x000000ff, 0x0040,
466         0x34, 0x00000040, 0x00004040,
467         0x9100, 0x07ffffff, 0x03000000,
468         0x9060, 0x0000007f, 0x00000020,
469         0x9508, 0x00010000, 0x00010000,
470         0xac14, 0x000003ff, 0x000000f7,
471         0xac10, 0xffffffff, 0x00000000,
472         0xac0c, 0xffffffff, 0x32761054,
473         0x88d4, 0x0000001f, 0x00000010,
474         0x15c0, 0x000c0fc0, 0x000c0400
475 };
476
477 static const u32 verde_golden_rlc_registers[] =
478 {
479         0xc424, 0xffffffff, 0x033f1005,
480         0xc47c, 0xffffffff, 0x10808020,
481         0xc488, 0xffffffff, 0x00800008,
482         0xc314, 0xffffffff, 0x00001000,
483         0xc30c, 0xffffffff, 0x80010014
484 };
485
486 static const u32 verde_golden_registers[] =
487 {
488         0x9a10, 0x00010000, 0x00018208,
489         0x9830, 0xffffffff, 0x00000000,
490         0x9834, 0xf00fffff, 0x00000400,
491         0x9838, 0x0002021c, 0x00020200,
492         0xc78, 0x00000080, 0x00000000,
493         0xd030, 0x000300c0, 0x00800040,
494         0xd030, 0x000300c0, 0x00800040,
495         0xd830, 0x000300c0, 0x00800040,
496         0xd830, 0x000300c0, 0x00800040,
497         0x5bb0, 0x000000f0, 0x00000070,
498         0x5bc0, 0x00200000, 0x50100000,
499         0x7030, 0x31000311, 0x00000011,
500         0x2ae4, 0x00073ffe, 0x000022a2,
501         0x2ae4, 0x00073ffe, 0x000022a2,
502         0x2ae4, 0x00073ffe, 0x000022a2,
503         0x240c, 0x000007ff, 0x00000000,
504         0x240c, 0x000007ff, 0x00000000,
505         0x240c, 0x000007ff, 0x00000000,
506         0x8a14, 0xf000001f, 0x00000007,
507         0x8a14, 0xf000001f, 0x00000007,
508         0x8a14, 0xf000001f, 0x00000007,
509         0x8b24, 0xffffffff, 0x00ffffff,
510         0x8b10, 0x0000ff0f, 0x00000000,
511         0x28a4c, 0x07ffffff, 0x4e000000,
512         0x28350, 0x3f3f3fff, 0x0000124a,
513         0x28350, 0x3f3f3fff, 0x0000124a,
514         0x28350, 0x3f3f3fff, 0x0000124a,
515         0x30, 0x000000ff, 0x0040,
516         0x34, 0x00000040, 0x00004040,
517         0x9100, 0x07ffffff, 0x03000000,
518         0x9100, 0x07ffffff, 0x03000000,
519         0x8e88, 0x01ff1f3f, 0x00000000,
520         0x8e88, 0x01ff1f3f, 0x00000000,
521         0x8e88, 0x01ff1f3f, 0x00000000,
522         0x8e84, 0x01ff1f3f, 0x00000000,
523         0x8e84, 0x01ff1f3f, 0x00000000,
524         0x8e84, 0x01ff1f3f, 0x00000000,
525         0x9060, 0x0000007f, 0x00000020,
526         0x9508, 0x00010000, 0x00010000,
527         0xac14, 0x000003ff, 0x00000003,
528         0xac14, 0x000003ff, 0x00000003,
529         0xac14, 0x000003ff, 0x00000003,
530         0xac10, 0xffffffff, 0x00000000,
531         0xac10, 0xffffffff, 0x00000000,
532         0xac10, 0xffffffff, 0x00000000,
533         0xac0c, 0xffffffff, 0x00001032,
534         0xac0c, 0xffffffff, 0x00001032,
535         0xac0c, 0xffffffff, 0x00001032,
536         0x88d4, 0x0000001f, 0x00000010,
537         0x88d4, 0x0000001f, 0x00000010,
538         0x88d4, 0x0000001f, 0x00000010,
539         0x15c0, 0x000c0fc0, 0x000c0400
540 };
541
542 static const u32 oland_golden_rlc_registers[] =
543 {
544         0xc424, 0xffffffff, 0x00601005,
545         0xc47c, 0xffffffff, 0x10104040,
546         0xc488, 0xffffffff, 0x0100000a,
547         0xc314, 0xffffffff, 0x00000800,
548         0xc30c, 0xffffffff, 0x800000f4
549 };
550
551 static const u32 oland_golden_registers[] =
552 {
553         0x9a10, 0x00010000, 0x00018208,
554         0x9830, 0xffffffff, 0x00000000,
555         0x9834, 0xf00fffff, 0x00000400,
556         0x9838, 0x0002021c, 0x00020200,
557         0xc78, 0x00000080, 0x00000000,
558         0xd030, 0x000300c0, 0x00800040,
559         0xd830, 0x000300c0, 0x00800040,
560         0x5bb0, 0x000000f0, 0x00000070,
561         0x5bc0, 0x00200000, 0x50100000,
562         0x7030, 0x31000311, 0x00000011,
563         0x2ae4, 0x00073ffe, 0x000022a2,
564         0x240c, 0x000007ff, 0x00000000,
565         0x8a14, 0xf000001f, 0x00000007,
566         0x8b24, 0xffffffff, 0x00ffffff,
567         0x8b10, 0x0000ff0f, 0x00000000,
568         0x28a4c, 0x07ffffff, 0x4e000000,
569         0x28350, 0x3f3f3fff, 0x00000082,
570         0x30, 0x000000ff, 0x0040,
571         0x34, 0x00000040, 0x00004040,
572         0x9100, 0x07ffffff, 0x03000000,
573         0x9060, 0x0000007f, 0x00000020,
574         0x9508, 0x00010000, 0x00010000,
575         0xac14, 0x000003ff, 0x000000f3,
576         0xac10, 0xffffffff, 0x00000000,
577         0xac0c, 0xffffffff, 0x00003210,
578         0x88d4, 0x0000001f, 0x00000010,
579         0x15c0, 0x000c0fc0, 0x000c0400
580 };
581
582 static const u32 hainan_golden_registers[] =
583 {
584         0x9a10, 0x00010000, 0x00018208,
585         0x9830, 0xffffffff, 0x00000000,
586         0x9834, 0xf00fffff, 0x00000400,
587         0x9838, 0x0002021c, 0x00020200,
588         0xd0c0, 0xff000fff, 0x00000100,
589         0xd030, 0x000300c0, 0x00800040,
590         0xd8c0, 0xff000fff, 0x00000100,
591         0xd830, 0x000300c0, 0x00800040,
592         0x2ae4, 0x00073ffe, 0x000022a2,
593         0x240c, 0x000007ff, 0x00000000,
594         0x8a14, 0xf000001f, 0x00000007,
595         0x8b24, 0xffffffff, 0x00ffffff,
596         0x8b10, 0x0000ff0f, 0x00000000,
597         0x28a4c, 0x07ffffff, 0x4e000000,
598         0x28350, 0x3f3f3fff, 0x00000000,
599         0x30, 0x000000ff, 0x0040,
600         0x34, 0x00000040, 0x00004040,
601         0x9100, 0x03e00000, 0x03600000,
602         0x9060, 0x0000007f, 0x00000020,
603         0x9508, 0x00010000, 0x00010000,
604         0xac14, 0x000003ff, 0x000000f1,
605         0xac10, 0xffffffff, 0x00000000,
606         0xac0c, 0xffffffff, 0x00003210,
607         0x88d4, 0x0000001f, 0x00000010,
608         0x15c0, 0x000c0fc0, 0x000c0400
609 };
610
611 static const u32 hainan_golden_registers2[] =
612 {
613         0x98f8, 0xffffffff, 0x02010001
614 };
615
616 static const u32 tahiti_mgcg_cgcg_init[] =
617 {
618         0xc400, 0xffffffff, 0xfffffffc,
619         0x802c, 0xffffffff, 0xe0000000,
620         0x9a60, 0xffffffff, 0x00000100,
621         0x92a4, 0xffffffff, 0x00000100,
622         0xc164, 0xffffffff, 0x00000100,
623         0x9774, 0xffffffff, 0x00000100,
624         0x8984, 0xffffffff, 0x06000100,
625         0x8a18, 0xffffffff, 0x00000100,
626         0x92a0, 0xffffffff, 0x00000100,
627         0xc380, 0xffffffff, 0x00000100,
628         0x8b28, 0xffffffff, 0x00000100,
629         0x9144, 0xffffffff, 0x00000100,
630         0x8d88, 0xffffffff, 0x00000100,
631         0x8d8c, 0xffffffff, 0x00000100,
632         0x9030, 0xffffffff, 0x00000100,
633         0x9034, 0xffffffff, 0x00000100,
634         0x9038, 0xffffffff, 0x00000100,
635         0x903c, 0xffffffff, 0x00000100,
636         0xad80, 0xffffffff, 0x00000100,
637         0xac54, 0xffffffff, 0x00000100,
638         0x897c, 0xffffffff, 0x06000100,
639         0x9868, 0xffffffff, 0x00000100,
640         0x9510, 0xffffffff, 0x00000100,
641         0xaf04, 0xffffffff, 0x00000100,
642         0xae04, 0xffffffff, 0x00000100,
643         0x949c, 0xffffffff, 0x00000100,
644         0x802c, 0xffffffff, 0xe0000000,
645         0x9160, 0xffffffff, 0x00010000,
646         0x9164, 0xffffffff, 0x00030002,
647         0x9168, 0xffffffff, 0x00040007,
648         0x916c, 0xffffffff, 0x00060005,
649         0x9170, 0xffffffff, 0x00090008,
650         0x9174, 0xffffffff, 0x00020001,
651         0x9178, 0xffffffff, 0x00040003,
652         0x917c, 0xffffffff, 0x00000007,
653         0x9180, 0xffffffff, 0x00060005,
654         0x9184, 0xffffffff, 0x00090008,
655         0x9188, 0xffffffff, 0x00030002,
656         0x918c, 0xffffffff, 0x00050004,
657         0x9190, 0xffffffff, 0x00000008,
658         0x9194, 0xffffffff, 0x00070006,
659         0x9198, 0xffffffff, 0x000a0009,
660         0x919c, 0xffffffff, 0x00040003,
661         0x91a0, 0xffffffff, 0x00060005,
662         0x91a4, 0xffffffff, 0x00000009,
663         0x91a8, 0xffffffff, 0x00080007,
664         0x91ac, 0xffffffff, 0x000b000a,
665         0x91b0, 0xffffffff, 0x00050004,
666         0x91b4, 0xffffffff, 0x00070006,
667         0x91b8, 0xffffffff, 0x0008000b,
668         0x91bc, 0xffffffff, 0x000a0009,
669         0x91c0, 0xffffffff, 0x000d000c,
670         0x91c4, 0xffffffff, 0x00060005,
671         0x91c8, 0xffffffff, 0x00080007,
672         0x91cc, 0xffffffff, 0x0000000b,
673         0x91d0, 0xffffffff, 0x000a0009,
674         0x91d4, 0xffffffff, 0x000d000c,
675         0x91d8, 0xffffffff, 0x00070006,
676         0x91dc, 0xffffffff, 0x00090008,
677         0x91e0, 0xffffffff, 0x0000000c,
678         0x91e4, 0xffffffff, 0x000b000a,
679         0x91e8, 0xffffffff, 0x000e000d,
680         0x91ec, 0xffffffff, 0x00080007,
681         0x91f0, 0xffffffff, 0x000a0009,
682         0x91f4, 0xffffffff, 0x0000000d,
683         0x91f8, 0xffffffff, 0x000c000b,
684         0x91fc, 0xffffffff, 0x000f000e,
685         0x9200, 0xffffffff, 0x00090008,
686         0x9204, 0xffffffff, 0x000b000a,
687         0x9208, 0xffffffff, 0x000c000f,
688         0x920c, 0xffffffff, 0x000e000d,
689         0x9210, 0xffffffff, 0x00110010,
690         0x9214, 0xffffffff, 0x000a0009,
691         0x9218, 0xffffffff, 0x000c000b,
692         0x921c, 0xffffffff, 0x0000000f,
693         0x9220, 0xffffffff, 0x000e000d,
694         0x9224, 0xffffffff, 0x00110010,
695         0x9228, 0xffffffff, 0x000b000a,
696         0x922c, 0xffffffff, 0x000d000c,
697         0x9230, 0xffffffff, 0x00000010,
698         0x9234, 0xffffffff, 0x000f000e,
699         0x9238, 0xffffffff, 0x00120011,
700         0x923c, 0xffffffff, 0x000c000b,
701         0x9240, 0xffffffff, 0x000e000d,
702         0x9244, 0xffffffff, 0x00000011,
703         0x9248, 0xffffffff, 0x0010000f,
704         0x924c, 0xffffffff, 0x00130012,
705         0x9250, 0xffffffff, 0x000d000c,
706         0x9254, 0xffffffff, 0x000f000e,
707         0x9258, 0xffffffff, 0x00100013,
708         0x925c, 0xffffffff, 0x00120011,
709         0x9260, 0xffffffff, 0x00150014,
710         0x9264, 0xffffffff, 0x000e000d,
711         0x9268, 0xffffffff, 0x0010000f,
712         0x926c, 0xffffffff, 0x00000013,
713         0x9270, 0xffffffff, 0x00120011,
714         0x9274, 0xffffffff, 0x00150014,
715         0x9278, 0xffffffff, 0x000f000e,
716         0x927c, 0xffffffff, 0x00110010,
717         0x9280, 0xffffffff, 0x00000014,
718         0x9284, 0xffffffff, 0x00130012,
719         0x9288, 0xffffffff, 0x00160015,
720         0x928c, 0xffffffff, 0x0010000f,
721         0x9290, 0xffffffff, 0x00120011,
722         0x9294, 0xffffffff, 0x00000015,
723         0x9298, 0xffffffff, 0x00140013,
724         0x929c, 0xffffffff, 0x00170016,
725         0x9150, 0xffffffff, 0x96940200,
726         0x8708, 0xffffffff, 0x00900100,
727         0xc478, 0xffffffff, 0x00000080,
728         0xc404, 0xffffffff, 0x0020003f,
729         0x30, 0xffffffff, 0x0000001c,
730         0x34, 0x000f0000, 0x000f0000,
731         0x160c, 0xffffffff, 0x00000100,
732         0x1024, 0xffffffff, 0x00000100,
733         0x102c, 0x00000101, 0x00000000,
734         0x20a8, 0xffffffff, 0x00000104,
735         0x264c, 0x000c0000, 0x000c0000,
736         0x2648, 0x000c0000, 0x000c0000,
737         0x55e4, 0xff000fff, 0x00000100,
738         0x55e8, 0x00000001, 0x00000001,
739         0x2f50, 0x00000001, 0x00000001,
740         0x30cc, 0xc0000fff, 0x00000104,
741         0xc1e4, 0x00000001, 0x00000001,
742         0xd0c0, 0xfffffff0, 0x00000100,
743         0xd8c0, 0xfffffff0, 0x00000100
744 };
745
746 static const u32 pitcairn_mgcg_cgcg_init[] =
747 {
748         0xc400, 0xffffffff, 0xfffffffc,
749         0x802c, 0xffffffff, 0xe0000000,
750         0x9a60, 0xffffffff, 0x00000100,
751         0x92a4, 0xffffffff, 0x00000100,
752         0xc164, 0xffffffff, 0x00000100,
753         0x9774, 0xffffffff, 0x00000100,
754         0x8984, 0xffffffff, 0x06000100,
755         0x8a18, 0xffffffff, 0x00000100,
756         0x92a0, 0xffffffff, 0x00000100,
757         0xc380, 0xffffffff, 0x00000100,
758         0x8b28, 0xffffffff, 0x00000100,
759         0x9144, 0xffffffff, 0x00000100,
760         0x8d88, 0xffffffff, 0x00000100,
761         0x8d8c, 0xffffffff, 0x00000100,
762         0x9030, 0xffffffff, 0x00000100,
763         0x9034, 0xffffffff, 0x00000100,
764         0x9038, 0xffffffff, 0x00000100,
765         0x903c, 0xffffffff, 0x00000100,
766         0xad80, 0xffffffff, 0x00000100,
767         0xac54, 0xffffffff, 0x00000100,
768         0x897c, 0xffffffff, 0x06000100,
769         0x9868, 0xffffffff, 0x00000100,
770         0x9510, 0xffffffff, 0x00000100,
771         0xaf04, 0xffffffff, 0x00000100,
772         0xae04, 0xffffffff, 0x00000100,
773         0x949c, 0xffffffff, 0x00000100,
774         0x802c, 0xffffffff, 0xe0000000,
775         0x9160, 0xffffffff, 0x00010000,
776         0x9164, 0xffffffff, 0x00030002,
777         0x9168, 0xffffffff, 0x00040007,
778         0x916c, 0xffffffff, 0x00060005,
779         0x9170, 0xffffffff, 0x00090008,
780         0x9174, 0xffffffff, 0x00020001,
781         0x9178, 0xffffffff, 0x00040003,
782         0x917c, 0xffffffff, 0x00000007,
783         0x9180, 0xffffffff, 0x00060005,
784         0x9184, 0xffffffff, 0x00090008,
785         0x9188, 0xffffffff, 0x00030002,
786         0x918c, 0xffffffff, 0x00050004,
787         0x9190, 0xffffffff, 0x00000008,
788         0x9194, 0xffffffff, 0x00070006,
789         0x9198, 0xffffffff, 0x000a0009,
790         0x919c, 0xffffffff, 0x00040003,
791         0x91a0, 0xffffffff, 0x00060005,
792         0x91a4, 0xffffffff, 0x00000009,
793         0x91a8, 0xffffffff, 0x00080007,
794         0x91ac, 0xffffffff, 0x000b000a,
795         0x91b0, 0xffffffff, 0x00050004,
796         0x91b4, 0xffffffff, 0x00070006,
797         0x91b8, 0xffffffff, 0x0008000b,
798         0x91bc, 0xffffffff, 0x000a0009,
799         0x91c0, 0xffffffff, 0x000d000c,
800         0x9200, 0xffffffff, 0x00090008,
801         0x9204, 0xffffffff, 0x000b000a,
802         0x9208, 0xffffffff, 0x000c000f,
803         0x920c, 0xffffffff, 0x000e000d,
804         0x9210, 0xffffffff, 0x00110010,
805         0x9214, 0xffffffff, 0x000a0009,
806         0x9218, 0xffffffff, 0x000c000b,
807         0x921c, 0xffffffff, 0x0000000f,
808         0x9220, 0xffffffff, 0x000e000d,
809         0x9224, 0xffffffff, 0x00110010,
810         0x9228, 0xffffffff, 0x000b000a,
811         0x922c, 0xffffffff, 0x000d000c,
812         0x9230, 0xffffffff, 0x00000010,
813         0x9234, 0xffffffff, 0x000f000e,
814         0x9238, 0xffffffff, 0x00120011,
815         0x923c, 0xffffffff, 0x000c000b,
816         0x9240, 0xffffffff, 0x000e000d,
817         0x9244, 0xffffffff, 0x00000011,
818         0x9248, 0xffffffff, 0x0010000f,
819         0x924c, 0xffffffff, 0x00130012,
820         0x9250, 0xffffffff, 0x000d000c,
821         0x9254, 0xffffffff, 0x000f000e,
822         0x9258, 0xffffffff, 0x00100013,
823         0x925c, 0xffffffff, 0x00120011,
824         0x9260, 0xffffffff, 0x00150014,
825         0x9150, 0xffffffff, 0x96940200,
826         0x8708, 0xffffffff, 0x00900100,
827         0xc478, 0xffffffff, 0x00000080,
828         0xc404, 0xffffffff, 0x0020003f,
829         0x30, 0xffffffff, 0x0000001c,
830         0x34, 0x000f0000, 0x000f0000,
831         0x160c, 0xffffffff, 0x00000100,
832         0x1024, 0xffffffff, 0x00000100,
833         0x102c, 0x00000101, 0x00000000,
834         0x20a8, 0xffffffff, 0x00000104,
835         0x55e4, 0xff000fff, 0x00000100,
836         0x55e8, 0x00000001, 0x00000001,
837         0x2f50, 0x00000001, 0x00000001,
838         0x30cc, 0xc0000fff, 0x00000104,
839         0xc1e4, 0x00000001, 0x00000001,
840         0xd0c0, 0xfffffff0, 0x00000100,
841         0xd8c0, 0xfffffff0, 0x00000100
842 };
843
844 static const u32 verde_mgcg_cgcg_init[] =
845 {
846         0xc400, 0xffffffff, 0xfffffffc,
847         0x802c, 0xffffffff, 0xe0000000,
848         0x9a60, 0xffffffff, 0x00000100,
849         0x92a4, 0xffffffff, 0x00000100,
850         0xc164, 0xffffffff, 0x00000100,
851         0x9774, 0xffffffff, 0x00000100,
852         0x8984, 0xffffffff, 0x06000100,
853         0x8a18, 0xffffffff, 0x00000100,
854         0x92a0, 0xffffffff, 0x00000100,
855         0xc380, 0xffffffff, 0x00000100,
856         0x8b28, 0xffffffff, 0x00000100,
857         0x9144, 0xffffffff, 0x00000100,
858         0x8d88, 0xffffffff, 0x00000100,
859         0x8d8c, 0xffffffff, 0x00000100,
860         0x9030, 0xffffffff, 0x00000100,
861         0x9034, 0xffffffff, 0x00000100,
862         0x9038, 0xffffffff, 0x00000100,
863         0x903c, 0xffffffff, 0x00000100,
864         0xad80, 0xffffffff, 0x00000100,
865         0xac54, 0xffffffff, 0x00000100,
866         0x897c, 0xffffffff, 0x06000100,
867         0x9868, 0xffffffff, 0x00000100,
868         0x9510, 0xffffffff, 0x00000100,
869         0xaf04, 0xffffffff, 0x00000100,
870         0xae04, 0xffffffff, 0x00000100,
871         0x949c, 0xffffffff, 0x00000100,
872         0x802c, 0xffffffff, 0xe0000000,
873         0x9160, 0xffffffff, 0x00010000,
874         0x9164, 0xffffffff, 0x00030002,
875         0x9168, 0xffffffff, 0x00040007,
876         0x916c, 0xffffffff, 0x00060005,
877         0x9170, 0xffffffff, 0x00090008,
878         0x9174, 0xffffffff, 0x00020001,
879         0x9178, 0xffffffff, 0x00040003,
880         0x917c, 0xffffffff, 0x00000007,
881         0x9180, 0xffffffff, 0x00060005,
882         0x9184, 0xffffffff, 0x00090008,
883         0x9188, 0xffffffff, 0x00030002,
884         0x918c, 0xffffffff, 0x00050004,
885         0x9190, 0xffffffff, 0x00000008,
886         0x9194, 0xffffffff, 0x00070006,
887         0x9198, 0xffffffff, 0x000a0009,
888         0x919c, 0xffffffff, 0x00040003,
889         0x91a0, 0xffffffff, 0x00060005,
890         0x91a4, 0xffffffff, 0x00000009,
891         0x91a8, 0xffffffff, 0x00080007,
892         0x91ac, 0xffffffff, 0x000b000a,
893         0x91b0, 0xffffffff, 0x00050004,
894         0x91b4, 0xffffffff, 0x00070006,
895         0x91b8, 0xffffffff, 0x0008000b,
896         0x91bc, 0xffffffff, 0x000a0009,
897         0x91c0, 0xffffffff, 0x000d000c,
898         0x9200, 0xffffffff, 0x00090008,
899         0x9204, 0xffffffff, 0x000b000a,
900         0x9208, 0xffffffff, 0x000c000f,
901         0x920c, 0xffffffff, 0x000e000d,
902         0x9210, 0xffffffff, 0x00110010,
903         0x9214, 0xffffffff, 0x000a0009,
904         0x9218, 0xffffffff, 0x000c000b,
905         0x921c, 0xffffffff, 0x0000000f,
906         0x9220, 0xffffffff, 0x000e000d,
907         0x9224, 0xffffffff, 0x00110010,
908         0x9228, 0xffffffff, 0x000b000a,
909         0x922c, 0xffffffff, 0x000d000c,
910         0x9230, 0xffffffff, 0x00000010,
911         0x9234, 0xffffffff, 0x000f000e,
912         0x9238, 0xffffffff, 0x00120011,
913         0x923c, 0xffffffff, 0x000c000b,
914         0x9240, 0xffffffff, 0x000e000d,
915         0x9244, 0xffffffff, 0x00000011,
916         0x9248, 0xffffffff, 0x0010000f,
917         0x924c, 0xffffffff, 0x00130012,
918         0x9250, 0xffffffff, 0x000d000c,
919         0x9254, 0xffffffff, 0x000f000e,
920         0x9258, 0xffffffff, 0x00100013,
921         0x925c, 0xffffffff, 0x00120011,
922         0x9260, 0xffffffff, 0x00150014,
923         0x9150, 0xffffffff, 0x96940200,
924         0x8708, 0xffffffff, 0x00900100,
925         0xc478, 0xffffffff, 0x00000080,
926         0xc404, 0xffffffff, 0x0020003f,
927         0x30, 0xffffffff, 0x0000001c,
928         0x34, 0x000f0000, 0x000f0000,
929         0x160c, 0xffffffff, 0x00000100,
930         0x1024, 0xffffffff, 0x00000100,
931         0x102c, 0x00000101, 0x00000000,
932         0x20a8, 0xffffffff, 0x00000104,
933         0x264c, 0x000c0000, 0x000c0000,
934         0x2648, 0x000c0000, 0x000c0000,
935         0x55e4, 0xff000fff, 0x00000100,
936         0x55e8, 0x00000001, 0x00000001,
937         0x2f50, 0x00000001, 0x00000001,
938         0x30cc, 0xc0000fff, 0x00000104,
939         0xc1e4, 0x00000001, 0x00000001,
940         0xd0c0, 0xfffffff0, 0x00000100,
941         0xd8c0, 0xfffffff0, 0x00000100
942 };
943
944 static const u32 oland_mgcg_cgcg_init[] =
945 {
946         0xc400, 0xffffffff, 0xfffffffc,
947         0x802c, 0xffffffff, 0xe0000000,
948         0x9a60, 0xffffffff, 0x00000100,
949         0x92a4, 0xffffffff, 0x00000100,
950         0xc164, 0xffffffff, 0x00000100,
951         0x9774, 0xffffffff, 0x00000100,
952         0x8984, 0xffffffff, 0x06000100,
953         0x8a18, 0xffffffff, 0x00000100,
954         0x92a0, 0xffffffff, 0x00000100,
955         0xc380, 0xffffffff, 0x00000100,
956         0x8b28, 0xffffffff, 0x00000100,
957         0x9144, 0xffffffff, 0x00000100,
958         0x8d88, 0xffffffff, 0x00000100,
959         0x8d8c, 0xffffffff, 0x00000100,
960         0x9030, 0xffffffff, 0x00000100,
961         0x9034, 0xffffffff, 0x00000100,
962         0x9038, 0xffffffff, 0x00000100,
963         0x903c, 0xffffffff, 0x00000100,
964         0xad80, 0xffffffff, 0x00000100,
965         0xac54, 0xffffffff, 0x00000100,
966         0x897c, 0xffffffff, 0x06000100,
967         0x9868, 0xffffffff, 0x00000100,
968         0x9510, 0xffffffff, 0x00000100,
969         0xaf04, 0xffffffff, 0x00000100,
970         0xae04, 0xffffffff, 0x00000100,
971         0x949c, 0xffffffff, 0x00000100,
972         0x802c, 0xffffffff, 0xe0000000,
973         0x9160, 0xffffffff, 0x00010000,
974         0x9164, 0xffffffff, 0x00030002,
975         0x9168, 0xffffffff, 0x00040007,
976         0x916c, 0xffffffff, 0x00060005,
977         0x9170, 0xffffffff, 0x00090008,
978         0x9174, 0xffffffff, 0x00020001,
979         0x9178, 0xffffffff, 0x00040003,
980         0x917c, 0xffffffff, 0x00000007,
981         0x9180, 0xffffffff, 0x00060005,
982         0x9184, 0xffffffff, 0x00090008,
983         0x9188, 0xffffffff, 0x00030002,
984         0x918c, 0xffffffff, 0x00050004,
985         0x9190, 0xffffffff, 0x00000008,
986         0x9194, 0xffffffff, 0x00070006,
987         0x9198, 0xffffffff, 0x000a0009,
988         0x919c, 0xffffffff, 0x00040003,
989         0x91a0, 0xffffffff, 0x00060005,
990         0x91a4, 0xffffffff, 0x00000009,
991         0x91a8, 0xffffffff, 0x00080007,
992         0x91ac, 0xffffffff, 0x000b000a,
993         0x91b0, 0xffffffff, 0x00050004,
994         0x91b4, 0xffffffff, 0x00070006,
995         0x91b8, 0xffffffff, 0x0008000b,
996         0x91bc, 0xffffffff, 0x000a0009,
997         0x91c0, 0xffffffff, 0x000d000c,
998         0x91c4, 0xffffffff, 0x00060005,
999         0x91c8, 0xffffffff, 0x00080007,
1000         0x91cc, 0xffffffff, 0x0000000b,
1001         0x91d0, 0xffffffff, 0x000a0009,
1002         0x91d4, 0xffffffff, 0x000d000c,
1003         0x9150, 0xffffffff, 0x96940200,
1004         0x8708, 0xffffffff, 0x00900100,
1005         0xc478, 0xffffffff, 0x00000080,
1006         0xc404, 0xffffffff, 0x0020003f,
1007         0x30, 0xffffffff, 0x0000001c,
1008         0x34, 0x000f0000, 0x000f0000,
1009         0x160c, 0xffffffff, 0x00000100,
1010         0x1024, 0xffffffff, 0x00000100,
1011         0x102c, 0x00000101, 0x00000000,
1012         0x20a8, 0xffffffff, 0x00000104,
1013         0x264c, 0x000c0000, 0x000c0000,
1014         0x2648, 0x000c0000, 0x000c0000,
1015         0x55e4, 0xff000fff, 0x00000100,
1016         0x55e8, 0x00000001, 0x00000001,
1017         0x2f50, 0x00000001, 0x00000001,
1018         0x30cc, 0xc0000fff, 0x00000104,
1019         0xc1e4, 0x00000001, 0x00000001,
1020         0xd0c0, 0xfffffff0, 0x00000100,
1021         0xd8c0, 0xfffffff0, 0x00000100
1022 };
1023
1024 static const u32 hainan_mgcg_cgcg_init[] =
1025 {
1026         0xc400, 0xffffffff, 0xfffffffc,
1027         0x802c, 0xffffffff, 0xe0000000,
1028         0x9a60, 0xffffffff, 0x00000100,
1029         0x92a4, 0xffffffff, 0x00000100,
1030         0xc164, 0xffffffff, 0x00000100,
1031         0x9774, 0xffffffff, 0x00000100,
1032         0x8984, 0xffffffff, 0x06000100,
1033         0x8a18, 0xffffffff, 0x00000100,
1034         0x92a0, 0xffffffff, 0x00000100,
1035         0xc380, 0xffffffff, 0x00000100,
1036         0x8b28, 0xffffffff, 0x00000100,
1037         0x9144, 0xffffffff, 0x00000100,
1038         0x8d88, 0xffffffff, 0x00000100,
1039         0x8d8c, 0xffffffff, 0x00000100,
1040         0x9030, 0xffffffff, 0x00000100,
1041         0x9034, 0xffffffff, 0x00000100,
1042         0x9038, 0xffffffff, 0x00000100,
1043         0x903c, 0xffffffff, 0x00000100,
1044         0xad80, 0xffffffff, 0x00000100,
1045         0xac54, 0xffffffff, 0x00000100,
1046         0x897c, 0xffffffff, 0x06000100,
1047         0x9868, 0xffffffff, 0x00000100,
1048         0x9510, 0xffffffff, 0x00000100,
1049         0xaf04, 0xffffffff, 0x00000100,
1050         0xae04, 0xffffffff, 0x00000100,
1051         0x949c, 0xffffffff, 0x00000100,
1052         0x802c, 0xffffffff, 0xe0000000,
1053         0x9160, 0xffffffff, 0x00010000,
1054         0x9164, 0xffffffff, 0x00030002,
1055         0x9168, 0xffffffff, 0x00040007,
1056         0x916c, 0xffffffff, 0x00060005,
1057         0x9170, 0xffffffff, 0x00090008,
1058         0x9174, 0xffffffff, 0x00020001,
1059         0x9178, 0xffffffff, 0x00040003,
1060         0x917c, 0xffffffff, 0x00000007,
1061         0x9180, 0xffffffff, 0x00060005,
1062         0x9184, 0xffffffff, 0x00090008,
1063         0x9188, 0xffffffff, 0x00030002,
1064         0x918c, 0xffffffff, 0x00050004,
1065         0x9190, 0xffffffff, 0x00000008,
1066         0x9194, 0xffffffff, 0x00070006,
1067         0x9198, 0xffffffff, 0x000a0009,
1068         0x919c, 0xffffffff, 0x00040003,
1069         0x91a0, 0xffffffff, 0x00060005,
1070         0x91a4, 0xffffffff, 0x00000009,
1071         0x91a8, 0xffffffff, 0x00080007,
1072         0x91ac, 0xffffffff, 0x000b000a,
1073         0x91b0, 0xffffffff, 0x00050004,
1074         0x91b4, 0xffffffff, 0x00070006,
1075         0x91b8, 0xffffffff, 0x0008000b,
1076         0x91bc, 0xffffffff, 0x000a0009,
1077         0x91c0, 0xffffffff, 0x000d000c,
1078         0x91c4, 0xffffffff, 0x00060005,
1079         0x91c8, 0xffffffff, 0x00080007,
1080         0x91cc, 0xffffffff, 0x0000000b,
1081         0x91d0, 0xffffffff, 0x000a0009,
1082         0x91d4, 0xffffffff, 0x000d000c,
1083         0x9150, 0xffffffff, 0x96940200,
1084         0x8708, 0xffffffff, 0x00900100,
1085         0xc478, 0xffffffff, 0x00000080,
1086         0xc404, 0xffffffff, 0x0020003f,
1087         0x30, 0xffffffff, 0x0000001c,
1088         0x34, 0x000f0000, 0x000f0000,
1089         0x160c, 0xffffffff, 0x00000100,
1090         0x1024, 0xffffffff, 0x00000100,
1091         0x20a8, 0xffffffff, 0x00000104,
1092         0x264c, 0x000c0000, 0x000c0000,
1093         0x2648, 0x000c0000, 0x000c0000,
1094         0x2f50, 0x00000001, 0x00000001,
1095         0x30cc, 0xc0000fff, 0x00000104,
1096         0xc1e4, 0x00000001, 0x00000001,
1097         0xd0c0, 0xfffffff0, 0x00000100,
1098         0xd8c0, 0xfffffff0, 0x00000100
1099 };
1100
1101 static u32 verde_pg_init[] =
1102 {
1103         0x353c, 0xffffffff, 0x40000,
1104         0x3538, 0xffffffff, 0x200010ff,
1105         0x353c, 0xffffffff, 0x0,
1106         0x353c, 0xffffffff, 0x0,
1107         0x353c, 0xffffffff, 0x0,
1108         0x353c, 0xffffffff, 0x0,
1109         0x353c, 0xffffffff, 0x0,
1110         0x353c, 0xffffffff, 0x7007,
1111         0x3538, 0xffffffff, 0x300010ff,
1112         0x353c, 0xffffffff, 0x0,
1113         0x353c, 0xffffffff, 0x0,
1114         0x353c, 0xffffffff, 0x0,
1115         0x353c, 0xffffffff, 0x0,
1116         0x353c, 0xffffffff, 0x0,
1117         0x353c, 0xffffffff, 0x400000,
1118         0x3538, 0xffffffff, 0x100010ff,
1119         0x353c, 0xffffffff, 0x0,
1120         0x353c, 0xffffffff, 0x0,
1121         0x353c, 0xffffffff, 0x0,
1122         0x353c, 0xffffffff, 0x0,
1123         0x353c, 0xffffffff, 0x0,
1124         0x353c, 0xffffffff, 0x120200,
1125         0x3538, 0xffffffff, 0x500010ff,
1126         0x353c, 0xffffffff, 0x0,
1127         0x353c, 0xffffffff, 0x0,
1128         0x353c, 0xffffffff, 0x0,
1129         0x353c, 0xffffffff, 0x0,
1130         0x353c, 0xffffffff, 0x0,
1131         0x353c, 0xffffffff, 0x1e1e16,
1132         0x3538, 0xffffffff, 0x600010ff,
1133         0x353c, 0xffffffff, 0x0,
1134         0x353c, 0xffffffff, 0x0,
1135         0x353c, 0xffffffff, 0x0,
1136         0x353c, 0xffffffff, 0x0,
1137         0x353c, 0xffffffff, 0x0,
1138         0x353c, 0xffffffff, 0x171f1e,
1139         0x3538, 0xffffffff, 0x700010ff,
1140         0x353c, 0xffffffff, 0x0,
1141         0x353c, 0xffffffff, 0x0,
1142         0x353c, 0xffffffff, 0x0,
1143         0x353c, 0xffffffff, 0x0,
1144         0x353c, 0xffffffff, 0x0,
1145         0x353c, 0xffffffff, 0x0,
1146         0x3538, 0xffffffff, 0x9ff,
1147         0x3500, 0xffffffff, 0x0,
1148         0x3504, 0xffffffff, 0x10000800,
1149         0x3504, 0xffffffff, 0xf,
1150         0x3504, 0xffffffff, 0xf,
1151         0x3500, 0xffffffff, 0x4,
1152         0x3504, 0xffffffff, 0x1000051e,
1153         0x3504, 0xffffffff, 0xffff,
1154         0x3504, 0xffffffff, 0xffff,
1155         0x3500, 0xffffffff, 0x8,
1156         0x3504, 0xffffffff, 0x80500,
1157         0x3500, 0xffffffff, 0x12,
1158         0x3504, 0xffffffff, 0x9050c,
1159         0x3500, 0xffffffff, 0x1d,
1160         0x3504, 0xffffffff, 0xb052c,
1161         0x3500, 0xffffffff, 0x2a,
1162         0x3504, 0xffffffff, 0x1053e,
1163         0x3500, 0xffffffff, 0x2d,
1164         0x3504, 0xffffffff, 0x10546,
1165         0x3500, 0xffffffff, 0x30,
1166         0x3504, 0xffffffff, 0xa054e,
1167         0x3500, 0xffffffff, 0x3c,
1168         0x3504, 0xffffffff, 0x1055f,
1169         0x3500, 0xffffffff, 0x3f,
1170         0x3504, 0xffffffff, 0x10567,
1171         0x3500, 0xffffffff, 0x42,
1172         0x3504, 0xffffffff, 0x1056f,
1173         0x3500, 0xffffffff, 0x45,
1174         0x3504, 0xffffffff, 0x10572,
1175         0x3500, 0xffffffff, 0x48,
1176         0x3504, 0xffffffff, 0x20575,
1177         0x3500, 0xffffffff, 0x4c,
1178         0x3504, 0xffffffff, 0x190801,
1179         0x3500, 0xffffffff, 0x67,
1180         0x3504, 0xffffffff, 0x1082a,
1181         0x3500, 0xffffffff, 0x6a,
1182         0x3504, 0xffffffff, 0x1b082d,
1183         0x3500, 0xffffffff, 0x87,
1184         0x3504, 0xffffffff, 0x310851,
1185         0x3500, 0xffffffff, 0xba,
1186         0x3504, 0xffffffff, 0x891,
1187         0x3500, 0xffffffff, 0xbc,
1188         0x3504, 0xffffffff, 0x893,
1189         0x3500, 0xffffffff, 0xbe,
1190         0x3504, 0xffffffff, 0x20895,
1191         0x3500, 0xffffffff, 0xc2,
1192         0x3504, 0xffffffff, 0x20899,
1193         0x3500, 0xffffffff, 0xc6,
1194         0x3504, 0xffffffff, 0x2089d,
1195         0x3500, 0xffffffff, 0xca,
1196         0x3504, 0xffffffff, 0x8a1,
1197         0x3500, 0xffffffff, 0xcc,
1198         0x3504, 0xffffffff, 0x8a3,
1199         0x3500, 0xffffffff, 0xce,
1200         0x3504, 0xffffffff, 0x308a5,
1201         0x3500, 0xffffffff, 0xd3,
1202         0x3504, 0xffffffff, 0x6d08cd,
1203         0x3500, 0xffffffff, 0x142,
1204         0x3504, 0xffffffff, 0x2000095a,
1205         0x3504, 0xffffffff, 0x1,
1206         0x3500, 0xffffffff, 0x144,
1207         0x3504, 0xffffffff, 0x301f095b,
1208         0x3500, 0xffffffff, 0x165,
1209         0x3504, 0xffffffff, 0xc094d,
1210         0x3500, 0xffffffff, 0x173,
1211         0x3504, 0xffffffff, 0xf096d,
1212         0x3500, 0xffffffff, 0x184,
1213         0x3504, 0xffffffff, 0x15097f,
1214         0x3500, 0xffffffff, 0x19b,
1215         0x3504, 0xffffffff, 0xc0998,
1216         0x3500, 0xffffffff, 0x1a9,
1217         0x3504, 0xffffffff, 0x409a7,
1218         0x3500, 0xffffffff, 0x1af,
1219         0x3504, 0xffffffff, 0xcdc,
1220         0x3500, 0xffffffff, 0x1b1,
1221         0x3504, 0xffffffff, 0x800,
1222         0x3508, 0xffffffff, 0x6c9b2000,
1223         0x3510, 0xfc00, 0x2000,
1224         0x3544, 0xffffffff, 0xfc0,
1225         0x28d4, 0x00000100, 0x100
1226 };
1227
1228 static void si_init_golden_registers(struct radeon_device *rdev)
1229 {
1230         switch (rdev->family) {
1231         case CHIP_TAHITI:
1232                 radeon_program_register_sequence(rdev,
1233                                                  tahiti_golden_registers,
1234                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1235                 radeon_program_register_sequence(rdev,
1236                                                  tahiti_golden_rlc_registers,
1237                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1238                 radeon_program_register_sequence(rdev,
1239                                                  tahiti_mgcg_cgcg_init,
1240                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1241                 radeon_program_register_sequence(rdev,
1242                                                  tahiti_golden_registers2,
1243                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1244                 break;
1245         case CHIP_PITCAIRN:
1246                 radeon_program_register_sequence(rdev,
1247                                                  pitcairn_golden_registers,
1248                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1249                 radeon_program_register_sequence(rdev,
1250                                                  pitcairn_golden_rlc_registers,
1251                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1252                 radeon_program_register_sequence(rdev,
1253                                                  pitcairn_mgcg_cgcg_init,
1254                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1255                 break;
1256         case CHIP_VERDE:
1257                 radeon_program_register_sequence(rdev,
1258                                                  verde_golden_registers,
1259                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1260                 radeon_program_register_sequence(rdev,
1261                                                  verde_golden_rlc_registers,
1262                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1263                 radeon_program_register_sequence(rdev,
1264                                                  verde_mgcg_cgcg_init,
1265                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1266                 radeon_program_register_sequence(rdev,
1267                                                  verde_pg_init,
1268                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1269                 break;
1270         case CHIP_OLAND:
1271                 radeon_program_register_sequence(rdev,
1272                                                  oland_golden_registers,
1273                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1274                 radeon_program_register_sequence(rdev,
1275                                                  oland_golden_rlc_registers,
1276                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1277                 radeon_program_register_sequence(rdev,
1278                                                  oland_mgcg_cgcg_init,
1279                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1280                 break;
1281         case CHIP_HAINAN:
1282                 radeon_program_register_sequence(rdev,
1283                                                  hainan_golden_registers,
1284                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1285                 radeon_program_register_sequence(rdev,
1286                                                  hainan_golden_registers2,
1287                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1288                 radeon_program_register_sequence(rdev,
1289                                                  hainan_mgcg_cgcg_init,
1290                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1291                 break;
1292         default:
1293                 break;
1294         }
1295 }
1296
1297 /**
1298  * si_get_allowed_info_register - fetch the register for the info ioctl
1299  *
1300  * @rdev: radeon_device pointer
1301  * @reg: register offset in bytes
1302  * @val: register value
1303  *
1304  * Returns 0 for success or -EINVAL for an invalid register
1305  *
1306  */
1307 int si_get_allowed_info_register(struct radeon_device *rdev,
1308                                  u32 reg, u32 *val)
1309 {
1310         switch (reg) {
1311         case GRBM_STATUS:
1312         case GRBM_STATUS2:
1313         case GRBM_STATUS_SE0:
1314         case GRBM_STATUS_SE1:
1315         case SRBM_STATUS:
1316         case SRBM_STATUS2:
1317         case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1318         case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1319         case UVD_STATUS:
1320                 *val = RREG32(reg);
1321                 return 0;
1322         default:
1323                 return -EINVAL;
1324         }
1325 }
1326
1327 #define PCIE_BUS_CLK                10000
1328 #define TCLK                        (PCIE_BUS_CLK / 10)
1329
1330 /**
1331  * si_get_xclk - get the xclk
1332  *
1333  * @rdev: radeon_device pointer
1334  *
1335  * Returns the reference clock used by the gfx engine
1336  * (SI).
1337  */
1338 u32 si_get_xclk(struct radeon_device *rdev)
1339 {
1340         u32 reference_clock = rdev->clock.spll.reference_freq;
1341         u32 tmp;
1342
1343         tmp = RREG32(CG_CLKPIN_CNTL_2);
1344         if (tmp & MUX_TCLK_TO_XCLK)
1345                 return TCLK;
1346
1347         tmp = RREG32(CG_CLKPIN_CNTL);
1348         if (tmp & XTALIN_DIVIDE)
1349                 return reference_clock / 4;
1350
1351         return reference_clock;
1352 }
1353
1354 /* get temperature in millidegrees */
1355 int si_get_temp(struct radeon_device *rdev)
1356 {
1357         u32 temp;
1358         int actual_temp = 0;
1359
1360         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1361                 CTF_TEMP_SHIFT;
1362
1363         if (temp & 0x200)
1364                 actual_temp = 255;
1365         else
1366                 actual_temp = temp & 0x1ff;
1367
1368         actual_temp = (actual_temp * 1000);
1369
1370         return actual_temp;
1371 }
1372
1373 #define TAHITI_IO_MC_REGS_SIZE 36
1374
1375 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1376         {0x0000006f, 0x03044000},
1377         {0x00000070, 0x0480c018},
1378         {0x00000071, 0x00000040},
1379         {0x00000072, 0x01000000},
1380         {0x00000074, 0x000000ff},
1381         {0x00000075, 0x00143400},
1382         {0x00000076, 0x08ec0800},
1383         {0x00000077, 0x040000cc},
1384         {0x00000079, 0x00000000},
1385         {0x0000007a, 0x21000409},
1386         {0x0000007c, 0x00000000},
1387         {0x0000007d, 0xe8000000},
1388         {0x0000007e, 0x044408a8},
1389         {0x0000007f, 0x00000003},
1390         {0x00000080, 0x00000000},
1391         {0x00000081, 0x01000000},
1392         {0x00000082, 0x02000000},
1393         {0x00000083, 0x00000000},
1394         {0x00000084, 0xe3f3e4f4},
1395         {0x00000085, 0x00052024},
1396         {0x00000087, 0x00000000},
1397         {0x00000088, 0x66036603},
1398         {0x00000089, 0x01000000},
1399         {0x0000008b, 0x1c0a0000},
1400         {0x0000008c, 0xff010000},
1401         {0x0000008e, 0xffffefff},
1402         {0x0000008f, 0xfff3efff},
1403         {0x00000090, 0xfff3efbf},
1404         {0x00000094, 0x00101101},
1405         {0x00000095, 0x00000fff},
1406         {0x00000096, 0x00116fff},
1407         {0x00000097, 0x60010000},
1408         {0x00000098, 0x10010000},
1409         {0x00000099, 0x00006000},
1410         {0x0000009a, 0x00001000},
1411         {0x0000009f, 0x00a77400}
1412 };
1413
1414 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1415         {0x0000006f, 0x03044000},
1416         {0x00000070, 0x0480c018},
1417         {0x00000071, 0x00000040},
1418         {0x00000072, 0x01000000},
1419         {0x00000074, 0x000000ff},
1420         {0x00000075, 0x00143400},
1421         {0x00000076, 0x08ec0800},
1422         {0x00000077, 0x040000cc},
1423         {0x00000079, 0x00000000},
1424         {0x0000007a, 0x21000409},
1425         {0x0000007c, 0x00000000},
1426         {0x0000007d, 0xe8000000},
1427         {0x0000007e, 0x044408a8},
1428         {0x0000007f, 0x00000003},
1429         {0x00000080, 0x00000000},
1430         {0x00000081, 0x01000000},
1431         {0x00000082, 0x02000000},
1432         {0x00000083, 0x00000000},
1433         {0x00000084, 0xe3f3e4f4},
1434         {0x00000085, 0x00052024},
1435         {0x00000087, 0x00000000},
1436         {0x00000088, 0x66036603},
1437         {0x00000089, 0x01000000},
1438         {0x0000008b, 0x1c0a0000},
1439         {0x0000008c, 0xff010000},
1440         {0x0000008e, 0xffffefff},
1441         {0x0000008f, 0xfff3efff},
1442         {0x00000090, 0xfff3efbf},
1443         {0x00000094, 0x00101101},
1444         {0x00000095, 0x00000fff},
1445         {0x00000096, 0x00116fff},
1446         {0x00000097, 0x60010000},
1447         {0x00000098, 0x10010000},
1448         {0x00000099, 0x00006000},
1449         {0x0000009a, 0x00001000},
1450         {0x0000009f, 0x00a47400}
1451 };
1452
1453 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1454         {0x0000006f, 0x03044000},
1455         {0x00000070, 0x0480c018},
1456         {0x00000071, 0x00000040},
1457         {0x00000072, 0x01000000},
1458         {0x00000074, 0x000000ff},
1459         {0x00000075, 0x00143400},
1460         {0x00000076, 0x08ec0800},
1461         {0x00000077, 0x040000cc},
1462         {0x00000079, 0x00000000},
1463         {0x0000007a, 0x21000409},
1464         {0x0000007c, 0x00000000},
1465         {0x0000007d, 0xe8000000},
1466         {0x0000007e, 0x044408a8},
1467         {0x0000007f, 0x00000003},
1468         {0x00000080, 0x00000000},
1469         {0x00000081, 0x01000000},
1470         {0x00000082, 0x02000000},
1471         {0x00000083, 0x00000000},
1472         {0x00000084, 0xe3f3e4f4},
1473         {0x00000085, 0x00052024},
1474         {0x00000087, 0x00000000},
1475         {0x00000088, 0x66036603},
1476         {0x00000089, 0x01000000},
1477         {0x0000008b, 0x1c0a0000},
1478         {0x0000008c, 0xff010000},
1479         {0x0000008e, 0xffffefff},
1480         {0x0000008f, 0xfff3efff},
1481         {0x00000090, 0xfff3efbf},
1482         {0x00000094, 0x00101101},
1483         {0x00000095, 0x00000fff},
1484         {0x00000096, 0x00116fff},
1485         {0x00000097, 0x60010000},
1486         {0x00000098, 0x10010000},
1487         {0x00000099, 0x00006000},
1488         {0x0000009a, 0x00001000},
1489         {0x0000009f, 0x00a37400}
1490 };
1491
1492 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1493         {0x0000006f, 0x03044000},
1494         {0x00000070, 0x0480c018},
1495         {0x00000071, 0x00000040},
1496         {0x00000072, 0x01000000},
1497         {0x00000074, 0x000000ff},
1498         {0x00000075, 0x00143400},
1499         {0x00000076, 0x08ec0800},
1500         {0x00000077, 0x040000cc},
1501         {0x00000079, 0x00000000},
1502         {0x0000007a, 0x21000409},
1503         {0x0000007c, 0x00000000},
1504         {0x0000007d, 0xe8000000},
1505         {0x0000007e, 0x044408a8},
1506         {0x0000007f, 0x00000003},
1507         {0x00000080, 0x00000000},
1508         {0x00000081, 0x01000000},
1509         {0x00000082, 0x02000000},
1510         {0x00000083, 0x00000000},
1511         {0x00000084, 0xe3f3e4f4},
1512         {0x00000085, 0x00052024},
1513         {0x00000087, 0x00000000},
1514         {0x00000088, 0x66036603},
1515         {0x00000089, 0x01000000},
1516         {0x0000008b, 0x1c0a0000},
1517         {0x0000008c, 0xff010000},
1518         {0x0000008e, 0xffffefff},
1519         {0x0000008f, 0xfff3efff},
1520         {0x00000090, 0xfff3efbf},
1521         {0x00000094, 0x00101101},
1522         {0x00000095, 0x00000fff},
1523         {0x00000096, 0x00116fff},
1524         {0x00000097, 0x60010000},
1525         {0x00000098, 0x10010000},
1526         {0x00000099, 0x00006000},
1527         {0x0000009a, 0x00001000},
1528         {0x0000009f, 0x00a17730}
1529 };
1530
1531 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1532         {0x0000006f, 0x03044000},
1533         {0x00000070, 0x0480c018},
1534         {0x00000071, 0x00000040},
1535         {0x00000072, 0x01000000},
1536         {0x00000074, 0x000000ff},
1537         {0x00000075, 0x00143400},
1538         {0x00000076, 0x08ec0800},
1539         {0x00000077, 0x040000cc},
1540         {0x00000079, 0x00000000},
1541         {0x0000007a, 0x21000409},
1542         {0x0000007c, 0x00000000},
1543         {0x0000007d, 0xe8000000},
1544         {0x0000007e, 0x044408a8},
1545         {0x0000007f, 0x00000003},
1546         {0x00000080, 0x00000000},
1547         {0x00000081, 0x01000000},
1548         {0x00000082, 0x02000000},
1549         {0x00000083, 0x00000000},
1550         {0x00000084, 0xe3f3e4f4},
1551         {0x00000085, 0x00052024},
1552         {0x00000087, 0x00000000},
1553         {0x00000088, 0x66036603},
1554         {0x00000089, 0x01000000},
1555         {0x0000008b, 0x1c0a0000},
1556         {0x0000008c, 0xff010000},
1557         {0x0000008e, 0xffffefff},
1558         {0x0000008f, 0xfff3efff},
1559         {0x00000090, 0xfff3efbf},
1560         {0x00000094, 0x00101101},
1561         {0x00000095, 0x00000fff},
1562         {0x00000096, 0x00116fff},
1563         {0x00000097, 0x60010000},
1564         {0x00000098, 0x10010000},
1565         {0x00000099, 0x00006000},
1566         {0x0000009a, 0x00001000},
1567         {0x0000009f, 0x00a07730}
1568 };
1569
1570 /* ucode loading */
1571 int si_mc_load_microcode(struct radeon_device *rdev)
1572 {
1573         const __be32 *fw_data = NULL;
1574         const __le32 *new_fw_data = NULL;
1575         u32 running;
1576         u32 *io_mc_regs = NULL;
1577         const __le32 *new_io_mc_regs = NULL;
1578         int i, regs_size, ucode_size;
1579
1580         if (!rdev->mc_fw)
1581                 return -EINVAL;
1582
1583         if (rdev->new_fw) {
1584                 const struct mc_firmware_header_v1_0 *hdr =
1585                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1586
1587                 radeon_ucode_print_mc_hdr(&hdr->header);
1588                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1589                 new_io_mc_regs = (const __le32 *)
1590                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1591                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1592                 new_fw_data = (const __le32 *)
1593                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1594         } else {
1595                 ucode_size = rdev->mc_fw->size / 4;
1596
1597                 switch (rdev->family) {
1598                 case CHIP_TAHITI:
1599                         io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1600                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1601                         break;
1602                 case CHIP_PITCAIRN:
1603                         io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1604                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1605                         break;
1606                 case CHIP_VERDE:
1607                 default:
1608                         io_mc_regs = (u32 *)&verde_io_mc_regs;
1609                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1610                         break;
1611                 case CHIP_OLAND:
1612                         io_mc_regs = (u32 *)&oland_io_mc_regs;
1613                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1614                         break;
1615                 case CHIP_HAINAN:
1616                         io_mc_regs = (u32 *)&hainan_io_mc_regs;
1617                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1618                         break;
1619                 }
1620                 fw_data = (const __be32 *)rdev->mc_fw->data;
1621         }
1622
1623         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1624
1625         if (running == 0) {
1626                 /* reset the engine and set to writable */
1627                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1628                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1629
1630                 /* load mc io regs */
1631                 for (i = 0; i < regs_size; i++) {
1632                         if (rdev->new_fw) {
1633                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1634                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1635                         } else {
1636                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1637                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1638                         }
1639                 }
1640                 /* load the MC ucode */
1641                 for (i = 0; i < ucode_size; i++) {
1642                         if (rdev->new_fw)
1643                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1644                         else
1645                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1646                 }
1647
1648                 /* put the engine back into the active state */
1649                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1650                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1651                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1652
1653                 /* wait for training to complete */
1654                 for (i = 0; i < rdev->usec_timeout; i++) {
1655                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1656                                 break;
1657                         udelay(1);
1658                 }
1659                 for (i = 0; i < rdev->usec_timeout; i++) {
1660                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1661                                 break;
1662                         udelay(1);
1663                 }
1664         }
1665
1666         return 0;
1667 }
1668
1669 static int si_init_microcode(struct radeon_device *rdev)
1670 {
1671         const char *chip_name;
1672         const char *new_chip_name;
1673         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1674         size_t smc_req_size, mc2_req_size;
1675         char fw_name[30];
1676         int err;
1677         int new_fw = 0;
1678         bool new_smc = false;
1679         bool si58_fw = false;
1680         bool banks2_fw = false;
1681
1682         DRM_DEBUG("\n");
1683
1684         switch (rdev->family) {
1685         case CHIP_TAHITI:
1686                 chip_name = "TAHITI";
1687                 new_chip_name = "tahiti";
1688                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1689                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1690                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1691                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1692                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1693                 mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1694                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1695                 break;
1696         case CHIP_PITCAIRN:
1697                 chip_name = "PITCAIRN";
1698                 if ((rdev->pdev->revision == 0x81) &&
1699                     ((rdev->pdev->device == 0x6810) ||
1700                      (rdev->pdev->device == 0x6811)))
1701                         new_smc = true;
1702                 new_chip_name = "pitcairn";
1703                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1704                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1705                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1706                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1707                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1708                 mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1709                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1710                 break;
1711         case CHIP_VERDE:
1712                 chip_name = "VERDE";
1713                 if (((rdev->pdev->device == 0x6820) &&
1714                      ((rdev->pdev->revision == 0x81) ||
1715                       (rdev->pdev->revision == 0x83))) ||
1716                     ((rdev->pdev->device == 0x6821) &&
1717                      ((rdev->pdev->revision == 0x83) ||
1718                       (rdev->pdev->revision == 0x87))) ||
1719                     ((rdev->pdev->revision == 0x87) &&
1720                      ((rdev->pdev->device == 0x6823) ||
1721                       (rdev->pdev->device == 0x682b))))
1722                         new_smc = true;
1723                 new_chip_name = "verde";
1724                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1725                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1726                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1727                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1728                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1729                 mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1730                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1731                 break;
1732         case CHIP_OLAND:
1733                 chip_name = "OLAND";
1734                 if (((rdev->pdev->revision == 0x81) &&
1735                      ((rdev->pdev->device == 0x6600) ||
1736                       (rdev->pdev->device == 0x6604) ||
1737                       (rdev->pdev->device == 0x6605) ||
1738                       (rdev->pdev->device == 0x6610))) ||
1739                     ((rdev->pdev->revision == 0x83) &&
1740                      (rdev->pdev->device == 0x6610)))
1741                         new_smc = true;
1742                 new_chip_name = "oland";
1743                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1744                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1745                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1746                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1747                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1748                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1749                 break;
1750         case CHIP_HAINAN:
1751                 chip_name = "HAINAN";
1752                 if (((rdev->pdev->revision == 0x81) &&
1753                      (rdev->pdev->device == 0x6660)) ||
1754                     ((rdev->pdev->revision == 0x83) &&
1755                      ((rdev->pdev->device == 0x6660) ||
1756                       (rdev->pdev->device == 0x6663) ||
1757                       (rdev->pdev->device == 0x6665) ||
1758                       (rdev->pdev->device == 0x6667))))
1759                         new_smc = true;
1760                 else if ((rdev->pdev->revision == 0xc3) &&
1761                          (rdev->pdev->device == 0x6665))
1762                         banks2_fw = true;
1763                 new_chip_name = "hainan";
1764                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1765                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1766                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1767                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1768                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1769                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1770                 break;
1771         default: BUG();
1772         }
1773
1774         /* this memory configuration requires special firmware */
1775         if (((RREG32(MC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
1776                 si58_fw = true;
1777
1778         DRM_INFO("Loading %s Microcode\n", new_chip_name);
1779
1780         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1781         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1782         if (err) {
1783                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1784                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1785                 if (err)
1786                         goto out;
1787                 if (rdev->pfp_fw->size != pfp_req_size) {
1788                         pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1789                                rdev->pfp_fw->size, fw_name);
1790                         err = -EINVAL;
1791                         goto out;
1792                 }
1793         } else {
1794                 err = radeon_ucode_validate(rdev->pfp_fw);
1795                 if (err) {
1796                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1797                                fw_name);
1798                         goto out;
1799                 } else {
1800                         new_fw++;
1801                 }
1802         }
1803
1804         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1805         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1806         if (err) {
1807                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1808                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1809                 if (err)
1810                         goto out;
1811                 if (rdev->me_fw->size != me_req_size) {
1812                         pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1813                                rdev->me_fw->size, fw_name);
1814                         err = -EINVAL;
1815                 }
1816         } else {
1817                 err = radeon_ucode_validate(rdev->me_fw);
1818                 if (err) {
1819                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1820                                fw_name);
1821                         goto out;
1822                 } else {
1823                         new_fw++;
1824                 }
1825         }
1826
1827         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1828         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1829         if (err) {
1830                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1831                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1832                 if (err)
1833                         goto out;
1834                 if (rdev->ce_fw->size != ce_req_size) {
1835                         pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1836                                rdev->ce_fw->size, fw_name);
1837                         err = -EINVAL;
1838                 }
1839         } else {
1840                 err = radeon_ucode_validate(rdev->ce_fw);
1841                 if (err) {
1842                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1843                                fw_name);
1844                         goto out;
1845                 } else {
1846                         new_fw++;
1847                 }
1848         }
1849
1850         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1851         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1852         if (err) {
1853                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1854                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1855                 if (err)
1856                         goto out;
1857                 if (rdev->rlc_fw->size != rlc_req_size) {
1858                         pr_err("si_rlc: Bogus length %zu in firmware \"%s\"\n",
1859                                rdev->rlc_fw->size, fw_name);
1860                         err = -EINVAL;
1861                 }
1862         } else {
1863                 err = radeon_ucode_validate(rdev->rlc_fw);
1864                 if (err) {
1865                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1866                                fw_name);
1867                         goto out;
1868                 } else {
1869                         new_fw++;
1870                 }
1871         }
1872
1873         if (si58_fw)
1874                 snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin");
1875         else
1876                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1877         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1878         if (err) {
1879                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1880                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1881                 if (err) {
1882                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1883                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1884                         if (err)
1885                                 goto out;
1886                 }
1887                 if ((rdev->mc_fw->size != mc_req_size) &&
1888                     (rdev->mc_fw->size != mc2_req_size)) {
1889                         pr_err("si_mc: Bogus length %zu in firmware \"%s\"\n",
1890                                rdev->mc_fw->size, fw_name);
1891                         err = -EINVAL;
1892                 }
1893                 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1894         } else {
1895                 err = radeon_ucode_validate(rdev->mc_fw);
1896                 if (err) {
1897                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1898                                fw_name);
1899                         goto out;
1900                 } else {
1901                         new_fw++;
1902                 }
1903         }
1904
1905         if (banks2_fw)
1906                 snprintf(fw_name, sizeof(fw_name), "radeon/banks_k_2_smc.bin");
1907         else if (new_smc)
1908                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
1909         else
1910                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1911         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1912         if (err) {
1913                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1914                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1915                 if (err) {
1916                         pr_err("smc: error loading firmware \"%s\"\n", fw_name);
1917                         release_firmware(rdev->smc_fw);
1918                         rdev->smc_fw = NULL;
1919                         err = 0;
1920                 } else if (rdev->smc_fw->size != smc_req_size) {
1921                         pr_err("si_smc: Bogus length %zu in firmware \"%s\"\n",
1922                                rdev->smc_fw->size, fw_name);
1923                         err = -EINVAL;
1924                 }
1925         } else {
1926                 err = radeon_ucode_validate(rdev->smc_fw);
1927                 if (err) {
1928                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1929                                fw_name);
1930                         goto out;
1931                 } else {
1932                         new_fw++;
1933                 }
1934         }
1935
1936         if (new_fw == 0) {
1937                 rdev->new_fw = false;
1938         } else if (new_fw < 6) {
1939                 pr_err("si_fw: mixing new and old firmware!\n");
1940                 err = -EINVAL;
1941         } else {
1942                 rdev->new_fw = true;
1943         }
1944 out:
1945         if (err) {
1946                 if (err != -EINVAL)
1947                         pr_err("si_cp: Failed to load firmware \"%s\"\n",
1948                                fw_name);
1949                 release_firmware(rdev->pfp_fw);
1950                 rdev->pfp_fw = NULL;
1951                 release_firmware(rdev->me_fw);
1952                 rdev->me_fw = NULL;
1953                 release_firmware(rdev->ce_fw);
1954                 rdev->ce_fw = NULL;
1955                 release_firmware(rdev->rlc_fw);
1956                 rdev->rlc_fw = NULL;
1957                 release_firmware(rdev->mc_fw);
1958                 rdev->mc_fw = NULL;
1959                 release_firmware(rdev->smc_fw);
1960                 rdev->smc_fw = NULL;
1961         }
1962         return err;
1963 }
1964
1965 /* watermark setup */
1966 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1967                                    struct radeon_crtc *radeon_crtc,
1968                                    struct drm_display_mode *mode,
1969                                    struct drm_display_mode *other_mode)
1970 {
1971         u32 tmp, buffer_alloc, i;
1972         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1973         /*
1974          * Line Buffer Setup
1975          * There are 3 line buffers, each one shared by 2 display controllers.
1976          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1977          * the display controllers.  The paritioning is done via one of four
1978          * preset allocations specified in bits 21:20:
1979          *  0 - half lb
1980          *  2 - whole lb, other crtc must be disabled
1981          */
1982         /* this can get tricky if we have two large displays on a paired group
1983          * of crtcs.  Ideally for multiple large displays we'd assign them to
1984          * non-linked crtcs for maximum line buffer allocation.
1985          */
1986         if (radeon_crtc->base.enabled && mode) {
1987                 if (other_mode) {
1988                         tmp = 0; /* 1/2 */
1989                         buffer_alloc = 1;
1990                 } else {
1991                         tmp = 2; /* whole */
1992                         buffer_alloc = 2;
1993                 }
1994         } else {
1995                 tmp = 0;
1996                 buffer_alloc = 0;
1997         }
1998
1999         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
2000                DC_LB_MEMORY_CONFIG(tmp));
2001
2002         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
2003                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
2004         for (i = 0; i < rdev->usec_timeout; i++) {
2005                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
2006                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
2007                         break;
2008                 udelay(1);
2009         }
2010
2011         if (radeon_crtc->base.enabled && mode) {
2012                 switch (tmp) {
2013                 case 0:
2014                 default:
2015                         return 4096 * 2;
2016                 case 2:
2017                         return 8192 * 2;
2018                 }
2019         }
2020
2021         /* controller not enabled, so no lb used */
2022         return 0;
2023 }
2024
2025 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2026 {
2027         u32 tmp = RREG32(MC_SHARED_CHMAP);
2028
2029         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2030         case 0:
2031         default:
2032                 return 1;
2033         case 1:
2034                 return 2;
2035         case 2:
2036                 return 4;
2037         case 3:
2038                 return 8;
2039         case 4:
2040                 return 3;
2041         case 5:
2042                 return 6;
2043         case 6:
2044                 return 10;
2045         case 7:
2046                 return 12;
2047         case 8:
2048                 return 16;
2049         }
2050 }
2051
2052 struct dce6_wm_params {
2053         u32 dram_channels; /* number of dram channels */
2054         u32 yclk;          /* bandwidth per dram data pin in kHz */
2055         u32 sclk;          /* engine clock in kHz */
2056         u32 disp_clk;      /* display clock in kHz */
2057         u32 src_width;     /* viewport width */
2058         u32 active_time;   /* active display time in ns */
2059         u32 blank_time;    /* blank time in ns */
2060         bool interlaced;    /* mode is interlaced */
2061         fixed20_12 vsc;    /* vertical scale ratio */
2062         u32 num_heads;     /* number of active crtcs */
2063         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2064         u32 lb_size;       /* line buffer allocated to pipe */
2065         u32 vtaps;         /* vertical scaler taps */
2066 };
2067
2068 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2069 {
2070         /* Calculate raw DRAM Bandwidth */
2071         fixed20_12 dram_efficiency; /* 0.7 */
2072         fixed20_12 yclk, dram_channels, bandwidth;
2073         fixed20_12 a;
2074
2075         a.full = dfixed_const(1000);
2076         yclk.full = dfixed_const(wm->yclk);
2077         yclk.full = dfixed_div(yclk, a);
2078         dram_channels.full = dfixed_const(wm->dram_channels * 4);
2079         a.full = dfixed_const(10);
2080         dram_efficiency.full = dfixed_const(7);
2081         dram_efficiency.full = dfixed_div(dram_efficiency, a);
2082         bandwidth.full = dfixed_mul(dram_channels, yclk);
2083         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2084
2085         return dfixed_trunc(bandwidth);
2086 }
2087
2088 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2089 {
2090         /* Calculate DRAM Bandwidth and the part allocated to display. */
2091         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2092         fixed20_12 yclk, dram_channels, bandwidth;
2093         fixed20_12 a;
2094
2095         a.full = dfixed_const(1000);
2096         yclk.full = dfixed_const(wm->yclk);
2097         yclk.full = dfixed_div(yclk, a);
2098         dram_channels.full = dfixed_const(wm->dram_channels * 4);
2099         a.full = dfixed_const(10);
2100         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2101         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2102         bandwidth.full = dfixed_mul(dram_channels, yclk);
2103         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2104
2105         return dfixed_trunc(bandwidth);
2106 }
2107
2108 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2109 {
2110         /* Calculate the display Data return Bandwidth */
2111         fixed20_12 return_efficiency; /* 0.8 */
2112         fixed20_12 sclk, bandwidth;
2113         fixed20_12 a;
2114
2115         a.full = dfixed_const(1000);
2116         sclk.full = dfixed_const(wm->sclk);
2117         sclk.full = dfixed_div(sclk, a);
2118         a.full = dfixed_const(10);
2119         return_efficiency.full = dfixed_const(8);
2120         return_efficiency.full = dfixed_div(return_efficiency, a);
2121         a.full = dfixed_const(32);
2122         bandwidth.full = dfixed_mul(a, sclk);
2123         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2124
2125         return dfixed_trunc(bandwidth);
2126 }
2127
2128 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2129 {
2130         return 32;
2131 }
2132
2133 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2134 {
2135         /* Calculate the DMIF Request Bandwidth */
2136         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2137         fixed20_12 disp_clk, sclk, bandwidth;
2138         fixed20_12 a, b1, b2;
2139         u32 min_bandwidth;
2140
2141         a.full = dfixed_const(1000);
2142         disp_clk.full = dfixed_const(wm->disp_clk);
2143         disp_clk.full = dfixed_div(disp_clk, a);
2144         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2145         b1.full = dfixed_mul(a, disp_clk);
2146
2147         a.full = dfixed_const(1000);
2148         sclk.full = dfixed_const(wm->sclk);
2149         sclk.full = dfixed_div(sclk, a);
2150         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2151         b2.full = dfixed_mul(a, sclk);
2152
2153         a.full = dfixed_const(10);
2154         disp_clk_request_efficiency.full = dfixed_const(8);
2155         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2156
2157         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2158
2159         a.full = dfixed_const(min_bandwidth);
2160         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2161
2162         return dfixed_trunc(bandwidth);
2163 }
2164
2165 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2166 {
2167         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2168         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2169         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2170         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2171
2172         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2173 }
2174
2175 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2176 {
2177         /* Calculate the display mode Average Bandwidth
2178          * DisplayMode should contain the source and destination dimensions,
2179          * timing, etc.
2180          */
2181         fixed20_12 bpp;
2182         fixed20_12 line_time;
2183         fixed20_12 src_width;
2184         fixed20_12 bandwidth;
2185         fixed20_12 a;
2186
2187         a.full = dfixed_const(1000);
2188         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2189         line_time.full = dfixed_div(line_time, a);
2190         bpp.full = dfixed_const(wm->bytes_per_pixel);
2191         src_width.full = dfixed_const(wm->src_width);
2192         bandwidth.full = dfixed_mul(src_width, bpp);
2193         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2194         bandwidth.full = dfixed_div(bandwidth, line_time);
2195
2196         return dfixed_trunc(bandwidth);
2197 }
2198
2199 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2200 {
2201         /* First calcualte the latency in ns */
2202         u32 mc_latency = 2000; /* 2000 ns. */
2203         u32 available_bandwidth = dce6_available_bandwidth(wm);
2204         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2205         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2206         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2207         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2208                 (wm->num_heads * cursor_line_pair_return_time);
2209         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2210         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2211         u32 tmp, dmif_size = 12288;
2212         fixed20_12 a, b, c;
2213
2214         if (wm->num_heads == 0)
2215                 return 0;
2216
2217         a.full = dfixed_const(2);
2218         b.full = dfixed_const(1);
2219         if ((wm->vsc.full > a.full) ||
2220             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2221             (wm->vtaps >= 5) ||
2222             ((wm->vsc.full >= a.full) && wm->interlaced))
2223                 max_src_lines_per_dst_line = 4;
2224         else
2225                 max_src_lines_per_dst_line = 2;
2226
2227         a.full = dfixed_const(available_bandwidth);
2228         b.full = dfixed_const(wm->num_heads);
2229         a.full = dfixed_div(a, b);
2230         tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
2231         tmp = min(dfixed_trunc(a), tmp);
2232
2233         lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
2234
2235         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2236         b.full = dfixed_const(1000);
2237         c.full = dfixed_const(lb_fill_bw);
2238         b.full = dfixed_div(c, b);
2239         a.full = dfixed_div(a, b);
2240         line_fill_time = dfixed_trunc(a);
2241
2242         if (line_fill_time < wm->active_time)
2243                 return latency;
2244         else
2245                 return latency + (line_fill_time - wm->active_time);
2246
2247 }
2248
2249 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2250 {
2251         if (dce6_average_bandwidth(wm) <=
2252             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2253                 return true;
2254         else
2255                 return false;
2256 };
2257
2258 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2259 {
2260         if (dce6_average_bandwidth(wm) <=
2261             (dce6_available_bandwidth(wm) / wm->num_heads))
2262                 return true;
2263         else
2264                 return false;
2265 };
2266
2267 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2268 {
2269         u32 lb_partitions = wm->lb_size / wm->src_width;
2270         u32 line_time = wm->active_time + wm->blank_time;
2271         u32 latency_tolerant_lines;
2272         u32 latency_hiding;
2273         fixed20_12 a;
2274
2275         a.full = dfixed_const(1);
2276         if (wm->vsc.full > a.full)
2277                 latency_tolerant_lines = 1;
2278         else {
2279                 if (lb_partitions <= (wm->vtaps + 1))
2280                         latency_tolerant_lines = 1;
2281                 else
2282                         latency_tolerant_lines = 2;
2283         }
2284
2285         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2286
2287         if (dce6_latency_watermark(wm) <= latency_hiding)
2288                 return true;
2289         else
2290                 return false;
2291 }
2292
2293 static void dce6_program_watermarks(struct radeon_device *rdev,
2294                                          struct radeon_crtc *radeon_crtc,
2295                                          u32 lb_size, u32 num_heads)
2296 {
2297         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2298         struct dce6_wm_params wm_low, wm_high;
2299         u32 dram_channels;
2300         u32 active_time;
2301         u32 line_time = 0;
2302         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2303         u32 priority_a_mark = 0, priority_b_mark = 0;
2304         u32 priority_a_cnt = PRIORITY_OFF;
2305         u32 priority_b_cnt = PRIORITY_OFF;
2306         u32 tmp, arb_control3;
2307         fixed20_12 a, b, c;
2308
2309         if (radeon_crtc->base.enabled && num_heads && mode) {
2310                 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
2311                                             (u32)mode->clock);
2312                 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
2313                                           (u32)mode->clock);
2314                 line_time = min(line_time, (u32)65535);
2315                 priority_a_cnt = 0;
2316                 priority_b_cnt = 0;
2317
2318                 if (rdev->family == CHIP_ARUBA)
2319                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2320                 else
2321                         dram_channels = si_get_number_of_dram_channels(rdev);
2322
2323                 /* watermark for high clocks */
2324                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2325                         wm_high.yclk =
2326                                 radeon_dpm_get_mclk(rdev, false) * 10;
2327                         wm_high.sclk =
2328                                 radeon_dpm_get_sclk(rdev, false) * 10;
2329                 } else {
2330                         wm_high.yclk = rdev->pm.current_mclk * 10;
2331                         wm_high.sclk = rdev->pm.current_sclk * 10;
2332                 }
2333
2334                 wm_high.disp_clk = mode->clock;
2335                 wm_high.src_width = mode->crtc_hdisplay;
2336                 wm_high.active_time = active_time;
2337                 wm_high.blank_time = line_time - wm_high.active_time;
2338                 wm_high.interlaced = false;
2339                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2340                         wm_high.interlaced = true;
2341                 wm_high.vsc = radeon_crtc->vsc;
2342                 wm_high.vtaps = 1;
2343                 if (radeon_crtc->rmx_type != RMX_OFF)
2344                         wm_high.vtaps = 2;
2345                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2346                 wm_high.lb_size = lb_size;
2347                 wm_high.dram_channels = dram_channels;
2348                 wm_high.num_heads = num_heads;
2349
2350                 /* watermark for low clocks */
2351                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2352                         wm_low.yclk =
2353                                 radeon_dpm_get_mclk(rdev, true) * 10;
2354                         wm_low.sclk =
2355                                 radeon_dpm_get_sclk(rdev, true) * 10;
2356                 } else {
2357                         wm_low.yclk = rdev->pm.current_mclk * 10;
2358                         wm_low.sclk = rdev->pm.current_sclk * 10;
2359                 }
2360
2361                 wm_low.disp_clk = mode->clock;
2362                 wm_low.src_width = mode->crtc_hdisplay;
2363                 wm_low.active_time = active_time;
2364                 wm_low.blank_time = line_time - wm_low.active_time;
2365                 wm_low.interlaced = false;
2366                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2367                         wm_low.interlaced = true;
2368                 wm_low.vsc = radeon_crtc->vsc;
2369                 wm_low.vtaps = 1;
2370                 if (radeon_crtc->rmx_type != RMX_OFF)
2371                         wm_low.vtaps = 2;
2372                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2373                 wm_low.lb_size = lb_size;
2374                 wm_low.dram_channels = dram_channels;
2375                 wm_low.num_heads = num_heads;
2376
2377                 /* set for high clocks */
2378                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2379                 /* set for low clocks */
2380                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2381
2382                 /* possibly force display priority to high */
2383                 /* should really do this at mode validation time... */
2384                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2385                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2386                     !dce6_check_latency_hiding(&wm_high) ||
2387                     (rdev->disp_priority == 2)) {
2388                         DRM_DEBUG_KMS("force priority to high\n");
2389                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2390                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2391                 }
2392                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2393                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2394                     !dce6_check_latency_hiding(&wm_low) ||
2395                     (rdev->disp_priority == 2)) {
2396                         DRM_DEBUG_KMS("force priority to high\n");
2397                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2398                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2399                 }
2400
2401                 a.full = dfixed_const(1000);
2402                 b.full = dfixed_const(mode->clock);
2403                 b.full = dfixed_div(b, a);
2404                 c.full = dfixed_const(latency_watermark_a);
2405                 c.full = dfixed_mul(c, b);
2406                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2407                 c.full = dfixed_div(c, a);
2408                 a.full = dfixed_const(16);
2409                 c.full = dfixed_div(c, a);
2410                 priority_a_mark = dfixed_trunc(c);
2411                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2412
2413                 a.full = dfixed_const(1000);
2414                 b.full = dfixed_const(mode->clock);
2415                 b.full = dfixed_div(b, a);
2416                 c.full = dfixed_const(latency_watermark_b);
2417                 c.full = dfixed_mul(c, b);
2418                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2419                 c.full = dfixed_div(c, a);
2420                 a.full = dfixed_const(16);
2421                 c.full = dfixed_div(c, a);
2422                 priority_b_mark = dfixed_trunc(c);
2423                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2424
2425                 /* Save number of lines the linebuffer leads before the scanout */
2426                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2427         }
2428
2429         /* select wm A */
2430         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2431         tmp = arb_control3;
2432         tmp &= ~LATENCY_WATERMARK_MASK(3);
2433         tmp |= LATENCY_WATERMARK_MASK(1);
2434         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2435         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2436                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2437                 LATENCY_HIGH_WATERMARK(line_time)));
2438         /* select wm B */
2439         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2440         tmp &= ~LATENCY_WATERMARK_MASK(3);
2441         tmp |= LATENCY_WATERMARK_MASK(2);
2442         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2443         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2444                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2445                 LATENCY_HIGH_WATERMARK(line_time)));
2446         /* restore original selection */
2447         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2448
2449         /* write the priority marks */
2450         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2451         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2452
2453         /* save values for DPM */
2454         radeon_crtc->line_time = line_time;
2455         radeon_crtc->wm_high = latency_watermark_a;
2456         radeon_crtc->wm_low = latency_watermark_b;
2457 }
2458
2459 void dce6_bandwidth_update(struct radeon_device *rdev)
2460 {
2461         struct drm_display_mode *mode0 = NULL;
2462         struct drm_display_mode *mode1 = NULL;
2463         u32 num_heads = 0, lb_size;
2464         int i;
2465
2466         if (!rdev->mode_info.mode_config_initialized)
2467                 return;
2468
2469         radeon_update_display_priority(rdev);
2470
2471         for (i = 0; i < rdev->num_crtc; i++) {
2472                 if (rdev->mode_info.crtcs[i]->base.enabled)
2473                         num_heads++;
2474         }
2475         for (i = 0; i < rdev->num_crtc; i += 2) {
2476                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2477                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2478                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2479                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2480                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2481                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2482         }
2483 }
2484
2485 /*
2486  * Core functions
2487  */
2488 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2489 {
2490         u32 *tile = rdev->config.si.tile_mode_array;
2491         const u32 num_tile_mode_states =
2492                         ARRAY_SIZE(rdev->config.si.tile_mode_array);
2493         u32 reg_offset, split_equal_to_row_size;
2494
2495         switch (rdev->config.si.mem_row_size_in_kb) {
2496         case 1:
2497                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2498                 break;
2499         case 2:
2500         default:
2501                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2502                 break;
2503         case 4:
2504                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2505                 break;
2506         }
2507
2508         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2509                 tile[reg_offset] = 0;
2510
2511         switch(rdev->family) {
2512         case CHIP_TAHITI:
2513         case CHIP_PITCAIRN:
2514                 /* non-AA compressed depth or any compressed stencil */
2515                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2517                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2518                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2519                            NUM_BANKS(ADDR_SURF_16_BANK) |
2520                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2523                 /* 2xAA/4xAA compressed depth only */
2524                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2526                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2527                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2528                            NUM_BANKS(ADDR_SURF_16_BANK) |
2529                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2531                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2532                 /* 8xAA compressed depth only */
2533                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2535                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2536                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2537                            NUM_BANKS(ADDR_SURF_16_BANK) |
2538                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2540                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2541                 /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2542                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2543                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2544                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2545                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2546                            NUM_BANKS(ADDR_SURF_16_BANK) |
2547                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2549                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2550                 /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2551                 tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2552                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2553                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2554                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2555                            NUM_BANKS(ADDR_SURF_16_BANK) |
2556                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2558                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2559                 /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2560                 tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2561                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2562                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2563                            TILE_SPLIT(split_equal_to_row_size) |
2564                            NUM_BANKS(ADDR_SURF_16_BANK) |
2565                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2567                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2568                 /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2569                 tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2570                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2571                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2572                            TILE_SPLIT(split_equal_to_row_size) |
2573                            NUM_BANKS(ADDR_SURF_16_BANK) |
2574                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2575                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2576                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2577                 /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2578                 tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2579                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2580                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2581                            TILE_SPLIT(split_equal_to_row_size) |
2582                            NUM_BANKS(ADDR_SURF_16_BANK) |
2583                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2585                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2586                 /* 1D and 1D Array Surfaces */
2587                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2588                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2589                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2590                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2591                            NUM_BANKS(ADDR_SURF_16_BANK) |
2592                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2594                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2595                 /* Displayable maps. */
2596                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2597                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2598                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2599                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2600                            NUM_BANKS(ADDR_SURF_16_BANK) |
2601                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2603                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2604                 /* Display 8bpp. */
2605                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2607                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2608                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2609                            NUM_BANKS(ADDR_SURF_16_BANK) |
2610                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2612                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2613                 /* Display 16bpp. */
2614                 tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2615                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2616                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2617                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2618                            NUM_BANKS(ADDR_SURF_16_BANK) |
2619                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2621                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2622                 /* Display 32bpp. */
2623                 tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2624                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2625                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2626                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2627                            NUM_BANKS(ADDR_SURF_16_BANK) |
2628                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2630                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2631                 /* Thin. */
2632                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2633                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2634                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2635                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2636                            NUM_BANKS(ADDR_SURF_16_BANK) |
2637                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2639                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2640                 /* Thin 8 bpp. */
2641                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2643                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2644                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2645                            NUM_BANKS(ADDR_SURF_16_BANK) |
2646                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2648                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2649                 /* Thin 16 bpp. */
2650                 tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2651                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2652                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2653                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2654                            NUM_BANKS(ADDR_SURF_16_BANK) |
2655                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2657                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2658                 /* Thin 32 bpp. */
2659                 tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2660                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2661                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2662                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2663                            NUM_BANKS(ADDR_SURF_16_BANK) |
2664                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2665                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2666                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2667                 /* Thin 64 bpp. */
2668                 tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2670                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2671                            TILE_SPLIT(split_equal_to_row_size) |
2672                            NUM_BANKS(ADDR_SURF_16_BANK) |
2673                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2674                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2675                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2676                 /* 8 bpp PRT. */
2677                 tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2679                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2680                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2681                            NUM_BANKS(ADDR_SURF_16_BANK) |
2682                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2683                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2684                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2685                 /* 16 bpp PRT */
2686                 tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2687                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2688                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2689                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2690                            NUM_BANKS(ADDR_SURF_16_BANK) |
2691                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2693                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2694                 /* 32 bpp PRT */
2695                 tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2697                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2698                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2699                            NUM_BANKS(ADDR_SURF_16_BANK) |
2700                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2701                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2702                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2703                 /* 64 bpp PRT */
2704                 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2706                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2707                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2708                            NUM_BANKS(ADDR_SURF_16_BANK) |
2709                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2711                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2712                 /* 128 bpp PRT */
2713                 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2714                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2715                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2716                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2717                            NUM_BANKS(ADDR_SURF_8_BANK) |
2718                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2719                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2720                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2721
2722                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2723                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2724                 break;
2725
2726         case CHIP_VERDE:
2727         case CHIP_OLAND:
2728         case CHIP_HAINAN:
2729                 /* non-AA compressed depth or any compressed stencil */
2730                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2731                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2732                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2733                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2734                            NUM_BANKS(ADDR_SURF_16_BANK) |
2735                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2736                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2737                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2738                 /* 2xAA/4xAA compressed depth only */
2739                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2740                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2741                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2742                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2743                            NUM_BANKS(ADDR_SURF_16_BANK) |
2744                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2746                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2747                 /* 8xAA compressed depth only */
2748                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2749                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2750                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2751                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2752                            NUM_BANKS(ADDR_SURF_16_BANK) |
2753                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2754                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2755                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2756                 /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2757                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2758                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2759                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2760                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2761                            NUM_BANKS(ADDR_SURF_16_BANK) |
2762                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2764                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2765                 /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2766                 tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2767                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2768                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2769                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2770                            NUM_BANKS(ADDR_SURF_16_BANK) |
2771                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2772                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2773                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2774                 /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2775                 tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2777                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778                            TILE_SPLIT(split_equal_to_row_size) |
2779                            NUM_BANKS(ADDR_SURF_16_BANK) |
2780                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2782                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2783                 /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2784                 tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2786                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787                            TILE_SPLIT(split_equal_to_row_size) |
2788                            NUM_BANKS(ADDR_SURF_16_BANK) |
2789                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2790                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2791                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2792                 /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2793                 tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2794                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2795                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2796                            TILE_SPLIT(split_equal_to_row_size) |
2797                            NUM_BANKS(ADDR_SURF_16_BANK) |
2798                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2800                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2801                 /* 1D and 1D Array Surfaces */
2802                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2803                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2804                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2805                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2806                            NUM_BANKS(ADDR_SURF_16_BANK) |
2807                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2809                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2810                 /* Displayable maps. */
2811                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2812                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2813                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2815                            NUM_BANKS(ADDR_SURF_16_BANK) |
2816                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2818                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2819                 /* Display 8bpp. */
2820                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2821                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2822                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2823                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2824                            NUM_BANKS(ADDR_SURF_16_BANK) |
2825                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2827                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2828                 /* Display 16bpp. */
2829                 tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2830                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2831                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2832                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2833                            NUM_BANKS(ADDR_SURF_16_BANK) |
2834                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2835                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2836                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2837                 /* Display 32bpp. */
2838                 tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2839                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2840                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2841                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2842                            NUM_BANKS(ADDR_SURF_16_BANK) |
2843                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2844                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2845                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2846                 /* Thin. */
2847                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2848                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2849                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2850                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2851                            NUM_BANKS(ADDR_SURF_16_BANK) |
2852                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2854                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2855                 /* Thin 8 bpp. */
2856                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2857                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2858                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2859                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2860                            NUM_BANKS(ADDR_SURF_16_BANK) |
2861                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2863                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2864                 /* Thin 16 bpp. */
2865                 tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2866                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2867                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2868                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2869                            NUM_BANKS(ADDR_SURF_16_BANK) |
2870                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2871                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2872                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2873                 /* Thin 32 bpp. */
2874                 tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2875                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2876                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2877                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2878                            NUM_BANKS(ADDR_SURF_16_BANK) |
2879                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2880                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2881                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2882                 /* Thin 64 bpp. */
2883                 tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2884                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2885                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2886                            TILE_SPLIT(split_equal_to_row_size) |
2887                            NUM_BANKS(ADDR_SURF_16_BANK) |
2888                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2889                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2890                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2891                 /* 8 bpp PRT. */
2892                 tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2893                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2894                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2895                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2896                            NUM_BANKS(ADDR_SURF_16_BANK) |
2897                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2898                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2899                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2900                 /* 16 bpp PRT */
2901                 tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2903                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2904                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2905                            NUM_BANKS(ADDR_SURF_16_BANK) |
2906                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2907                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2908                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2909                 /* 32 bpp PRT */
2910                 tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2911                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2912                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2913                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2914                            NUM_BANKS(ADDR_SURF_16_BANK) |
2915                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2916                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2917                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2918                 /* 64 bpp PRT */
2919                 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2921                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2922                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2923                            NUM_BANKS(ADDR_SURF_16_BANK) |
2924                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2925                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2926                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2927                 /* 128 bpp PRT */
2928                 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2929                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2930                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2931                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2932                            NUM_BANKS(ADDR_SURF_8_BANK) |
2933                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2934                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2935                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2936
2937                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2938                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2939                 break;
2940
2941         default:
2942                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2943         }
2944 }
2945
2946 static void si_select_se_sh(struct radeon_device *rdev,
2947                             u32 se_num, u32 sh_num)
2948 {
2949         u32 data = INSTANCE_BROADCAST_WRITES;
2950
2951         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2952                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2953         else if (se_num == 0xffffffff)
2954                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2955         else if (sh_num == 0xffffffff)
2956                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2957         else
2958                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2959         WREG32(GRBM_GFX_INDEX, data);
2960 }
2961
2962 static u32 si_create_bitmask(u32 bit_width)
2963 {
2964         u32 i, mask = 0;
2965
2966         for (i = 0; i < bit_width; i++) {
2967                 mask <<= 1;
2968                 mask |= 1;
2969         }
2970         return mask;
2971 }
2972
2973 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2974 {
2975         u32 data, mask;
2976
2977         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2978         if (data & 1)
2979                 data &= INACTIVE_CUS_MASK;
2980         else
2981                 data = 0;
2982         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2983
2984         data >>= INACTIVE_CUS_SHIFT;
2985
2986         mask = si_create_bitmask(cu_per_sh);
2987
2988         return ~data & mask;
2989 }
2990
2991 static void si_setup_spi(struct radeon_device *rdev,
2992                          u32 se_num, u32 sh_per_se,
2993                          u32 cu_per_sh)
2994 {
2995         int i, j, k;
2996         u32 data, mask, active_cu;
2997
2998         for (i = 0; i < se_num; i++) {
2999                 for (j = 0; j < sh_per_se; j++) {
3000                         si_select_se_sh(rdev, i, j);
3001                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
3002                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
3003
3004                         mask = 1;
3005                         for (k = 0; k < 16; k++) {
3006                                 mask <<= k;
3007                                 if (active_cu & mask) {
3008                                         data &= ~mask;
3009                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
3010                                         break;
3011                                 }
3012                         }
3013                 }
3014         }
3015         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3016 }
3017
3018 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3019                               u32 max_rb_num_per_se,
3020                               u32 sh_per_se)
3021 {
3022         u32 data, mask;
3023
3024         data = RREG32(CC_RB_BACKEND_DISABLE);
3025         if (data & 1)
3026                 data &= BACKEND_DISABLE_MASK;
3027         else
3028                 data = 0;
3029         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3030
3031         data >>= BACKEND_DISABLE_SHIFT;
3032
3033         mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3034
3035         return data & mask;
3036 }
3037
3038 static void si_setup_rb(struct radeon_device *rdev,
3039                         u32 se_num, u32 sh_per_se,
3040                         u32 max_rb_num_per_se)
3041 {
3042         int i, j;
3043         u32 data, mask;
3044         u32 disabled_rbs = 0;
3045         u32 enabled_rbs = 0;
3046
3047         for (i = 0; i < se_num; i++) {
3048                 for (j = 0; j < sh_per_se; j++) {
3049                         si_select_se_sh(rdev, i, j);
3050                         data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3051                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3052                 }
3053         }
3054         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3055
3056         mask = 1;
3057         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3058                 if (!(disabled_rbs & mask))
3059                         enabled_rbs |= mask;
3060                 mask <<= 1;
3061         }
3062
3063         rdev->config.si.backend_enable_mask = enabled_rbs;
3064
3065         for (i = 0; i < se_num; i++) {
3066                 si_select_se_sh(rdev, i, 0xffffffff);
3067                 data = 0;
3068                 for (j = 0; j < sh_per_se; j++) {
3069                         switch (enabled_rbs & 3) {
3070                         case 1:
3071                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3072                                 break;
3073                         case 2:
3074                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3075                                 break;
3076                         case 3:
3077                         default:
3078                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3079                                 break;
3080                         }
3081                         enabled_rbs >>= 2;
3082                 }
3083                 WREG32(PA_SC_RASTER_CONFIG, data);
3084         }
3085         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3086 }
3087
3088 static void si_gpu_init(struct radeon_device *rdev)
3089 {
3090         u32 gb_addr_config = 0;
3091         u32 mc_arb_ramcfg;
3092         u32 sx_debug_1;
3093         u32 hdp_host_path_cntl;
3094         u32 tmp;
3095         int i, j;
3096
3097         switch (rdev->family) {
3098         case CHIP_TAHITI:
3099                 rdev->config.si.max_shader_engines = 2;
3100                 rdev->config.si.max_tile_pipes = 12;
3101                 rdev->config.si.max_cu_per_sh = 8;
3102                 rdev->config.si.max_sh_per_se = 2;
3103                 rdev->config.si.max_backends_per_se = 4;
3104                 rdev->config.si.max_texture_channel_caches = 12;
3105                 rdev->config.si.max_gprs = 256;
3106                 rdev->config.si.max_gs_threads = 32;
3107                 rdev->config.si.max_hw_contexts = 8;
3108
3109                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3110                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3111                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3112                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3113                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3114                 break;
3115         case CHIP_PITCAIRN:
3116                 rdev->config.si.max_shader_engines = 2;
3117                 rdev->config.si.max_tile_pipes = 8;
3118                 rdev->config.si.max_cu_per_sh = 5;
3119                 rdev->config.si.max_sh_per_se = 2;
3120                 rdev->config.si.max_backends_per_se = 4;
3121                 rdev->config.si.max_texture_channel_caches = 8;
3122                 rdev->config.si.max_gprs = 256;
3123                 rdev->config.si.max_gs_threads = 32;
3124                 rdev->config.si.max_hw_contexts = 8;
3125
3126                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3127                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3128                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3129                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3130                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3131                 break;
3132         case CHIP_VERDE:
3133         default:
3134                 rdev->config.si.max_shader_engines = 1;
3135                 rdev->config.si.max_tile_pipes = 4;
3136                 rdev->config.si.max_cu_per_sh = 5;
3137                 rdev->config.si.max_sh_per_se = 2;
3138                 rdev->config.si.max_backends_per_se = 4;
3139                 rdev->config.si.max_texture_channel_caches = 4;
3140                 rdev->config.si.max_gprs = 256;
3141                 rdev->config.si.max_gs_threads = 32;
3142                 rdev->config.si.max_hw_contexts = 8;
3143
3144                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3145                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3146                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3147                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3148                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3149                 break;
3150         case CHIP_OLAND:
3151                 rdev->config.si.max_shader_engines = 1;
3152                 rdev->config.si.max_tile_pipes = 4;
3153                 rdev->config.si.max_cu_per_sh = 6;
3154                 rdev->config.si.max_sh_per_se = 1;
3155                 rdev->config.si.max_backends_per_se = 2;
3156                 rdev->config.si.max_texture_channel_caches = 4;
3157                 rdev->config.si.max_gprs = 256;
3158                 rdev->config.si.max_gs_threads = 16;
3159                 rdev->config.si.max_hw_contexts = 8;
3160
3161                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3162                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3163                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3164                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3165                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3166                 break;
3167         case CHIP_HAINAN:
3168                 rdev->config.si.max_shader_engines = 1;
3169                 rdev->config.si.max_tile_pipes = 4;
3170                 rdev->config.si.max_cu_per_sh = 5;
3171                 rdev->config.si.max_sh_per_se = 1;
3172                 rdev->config.si.max_backends_per_se = 1;
3173                 rdev->config.si.max_texture_channel_caches = 2;
3174                 rdev->config.si.max_gprs = 256;
3175                 rdev->config.si.max_gs_threads = 16;
3176                 rdev->config.si.max_hw_contexts = 8;
3177
3178                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3179                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3180                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3181                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3182                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3183                 break;
3184         }
3185
3186         /* Initialize HDP */
3187         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3188                 WREG32((0x2c14 + j), 0x00000000);
3189                 WREG32((0x2c18 + j), 0x00000000);
3190                 WREG32((0x2c1c + j), 0x00000000);
3191                 WREG32((0x2c20 + j), 0x00000000);
3192                 WREG32((0x2c24 + j), 0x00000000);
3193         }
3194
3195         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3196         WREG32(SRBM_INT_CNTL, 1);
3197         WREG32(SRBM_INT_ACK, 1);
3198
3199         evergreen_fix_pci_max_read_req_size(rdev);
3200
3201         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3202
3203         RREG32(MC_SHARED_CHMAP);
3204         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3205
3206         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3207         rdev->config.si.mem_max_burst_length_bytes = 256;
3208         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3209         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3210         if (rdev->config.si.mem_row_size_in_kb > 4)
3211                 rdev->config.si.mem_row_size_in_kb = 4;
3212         /* XXX use MC settings? */
3213         rdev->config.si.shader_engine_tile_size = 32;
3214         rdev->config.si.num_gpus = 1;
3215         rdev->config.si.multi_gpu_tile_size = 64;
3216
3217         /* fix up row size */
3218         gb_addr_config &= ~ROW_SIZE_MASK;
3219         switch (rdev->config.si.mem_row_size_in_kb) {
3220         case 1:
3221         default:
3222                 gb_addr_config |= ROW_SIZE(0);
3223                 break;
3224         case 2:
3225                 gb_addr_config |= ROW_SIZE(1);
3226                 break;
3227         case 4:
3228                 gb_addr_config |= ROW_SIZE(2);
3229                 break;
3230         }
3231
3232         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3233          * not have bank info, so create a custom tiling dword.
3234          * bits 3:0   num_pipes
3235          * bits 7:4   num_banks
3236          * bits 11:8  group_size
3237          * bits 15:12 row_size
3238          */
3239         rdev->config.si.tile_config = 0;
3240         switch (rdev->config.si.num_tile_pipes) {
3241         case 1:
3242                 rdev->config.si.tile_config |= (0 << 0);
3243                 break;
3244         case 2:
3245                 rdev->config.si.tile_config |= (1 << 0);
3246                 break;
3247         case 4:
3248                 rdev->config.si.tile_config |= (2 << 0);
3249                 break;
3250         case 8:
3251         default:
3252                 /* XXX what about 12? */
3253                 rdev->config.si.tile_config |= (3 << 0);
3254                 break;
3255         }
3256         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3257         case 0: /* four banks */
3258                 rdev->config.si.tile_config |= 0 << 4;
3259                 break;
3260         case 1: /* eight banks */
3261                 rdev->config.si.tile_config |= 1 << 4;
3262                 break;
3263         case 2: /* sixteen banks */
3264         default:
3265                 rdev->config.si.tile_config |= 2 << 4;
3266                 break;
3267         }
3268         rdev->config.si.tile_config |=
3269                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3270         rdev->config.si.tile_config |=
3271                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3272
3273         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3274         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3275         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3276         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3277         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3278         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3279         if (rdev->has_uvd) {
3280                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3281                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3282                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3283         }
3284
3285         si_tiling_mode_table_init(rdev);
3286
3287         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3288                     rdev->config.si.max_sh_per_se,
3289                     rdev->config.si.max_backends_per_se);
3290
3291         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3292                      rdev->config.si.max_sh_per_se,
3293                      rdev->config.si.max_cu_per_sh);
3294
3295         rdev->config.si.active_cus = 0;
3296         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3297                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3298                         rdev->config.si.active_cus +=
3299                                 hweight32(si_get_cu_active_bitmap(rdev, i, j));
3300                 }
3301         }
3302
3303         /* set HW defaults for 3D engine */
3304         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3305                                      ROQ_IB2_START(0x2b)));
3306         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3307
3308         sx_debug_1 = RREG32(SX_DEBUG_1);
3309         WREG32(SX_DEBUG_1, sx_debug_1);
3310
3311         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3312
3313         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3314                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3315                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3316                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3317
3318         WREG32(VGT_NUM_INSTANCES, 1);
3319
3320         WREG32(CP_PERFMON_CNTL, 0);
3321
3322         WREG32(SQ_CONFIG, 0);
3323
3324         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3325                                           FORCE_EOV_MAX_REZ_CNT(255)));
3326
3327         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3328                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3329
3330         WREG32(VGT_GS_VERTEX_REUSE, 16);
3331         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3332
3333         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3334         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3335         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3336         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3337         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3338         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3339         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3340         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3341
3342         tmp = RREG32(HDP_MISC_CNTL);
3343         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3344         WREG32(HDP_MISC_CNTL, tmp);
3345
3346         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3347         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3348
3349         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3350
3351         udelay(50);
3352 }
3353
3354 /*
3355  * GPU scratch registers helpers function.
3356  */
3357 static void si_scratch_init(struct radeon_device *rdev)
3358 {
3359         int i;
3360
3361         rdev->scratch.num_reg = 7;
3362         rdev->scratch.reg_base = SCRATCH_REG0;
3363         for (i = 0; i < rdev->scratch.num_reg; i++) {
3364                 rdev->scratch.free[i] = true;
3365                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3366         }
3367 }
3368
3369 void si_fence_ring_emit(struct radeon_device *rdev,
3370                         struct radeon_fence *fence)
3371 {
3372         struct radeon_ring *ring = &rdev->ring[fence->ring];
3373         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3374
3375         /* flush read cache over gart */
3376         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3377         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3378         radeon_ring_write(ring, 0);
3379         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3380         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3381                           PACKET3_TC_ACTION_ENA |
3382                           PACKET3_SH_KCACHE_ACTION_ENA |
3383                           PACKET3_SH_ICACHE_ACTION_ENA);
3384         radeon_ring_write(ring, 0xFFFFFFFF);
3385         radeon_ring_write(ring, 0);
3386         radeon_ring_write(ring, 10); /* poll interval */
3387         /* EVENT_WRITE_EOP - flush caches, send int */
3388         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3389         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3390         radeon_ring_write(ring, lower_32_bits(addr));
3391         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3392         radeon_ring_write(ring, fence->seq);
3393         radeon_ring_write(ring, 0);
3394 }
3395
3396 /*
3397  * IB stuff
3398  */
3399 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3400 {
3401         struct radeon_ring *ring = &rdev->ring[ib->ring];
3402         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3403         u32 header;
3404
3405         if (ib->is_const_ib) {
3406                 /* set switch buffer packet before const IB */
3407                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3408                 radeon_ring_write(ring, 0);
3409
3410                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3411         } else {
3412                 u32 next_rptr;
3413                 if (ring->rptr_save_reg) {
3414                         next_rptr = ring->wptr + 3 + 4 + 8;
3415                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3416                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3417                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3418                         radeon_ring_write(ring, next_rptr);
3419                 } else if (rdev->wb.enabled) {
3420                         next_rptr = ring->wptr + 5 + 4 + 8;
3421                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3422                         radeon_ring_write(ring, (1 << 8));
3423                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3424                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3425                         radeon_ring_write(ring, next_rptr);
3426                 }
3427
3428                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3429         }
3430
3431         radeon_ring_write(ring, header);
3432         radeon_ring_write(ring,
3433 #ifdef __BIG_ENDIAN
3434                           (2 << 0) |
3435 #endif
3436                           (ib->gpu_addr & 0xFFFFFFFC));
3437         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3438         radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3439
3440         if (!ib->is_const_ib) {
3441                 /* flush read cache over gart for this vmid */
3442                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3443                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3444                 radeon_ring_write(ring, vm_id);
3445                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3446                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3447                                   PACKET3_TC_ACTION_ENA |
3448                                   PACKET3_SH_KCACHE_ACTION_ENA |
3449                                   PACKET3_SH_ICACHE_ACTION_ENA);
3450                 radeon_ring_write(ring, 0xFFFFFFFF);
3451                 radeon_ring_write(ring, 0);
3452                 radeon_ring_write(ring, 10); /* poll interval */
3453         }
3454 }
3455
3456 /*
3457  * CP.
3458  */
3459 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3460 {
3461         if (enable)
3462                 WREG32(CP_ME_CNTL, 0);
3463         else {
3464                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3465                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3466                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3467                 WREG32(SCRATCH_UMSK, 0);
3468                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3469                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3470                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3471         }
3472         udelay(50);
3473 }
3474
3475 static int si_cp_load_microcode(struct radeon_device *rdev)
3476 {
3477         int i;
3478
3479         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3480                 return -EINVAL;
3481
3482         si_cp_enable(rdev, false);
3483
3484         if (rdev->new_fw) {
3485                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3486                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3487                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3488                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3489                 const struct gfx_firmware_header_v1_0 *me_hdr =
3490                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3491                 const __le32 *fw_data;
3492                 u32 fw_size;
3493
3494                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3495                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3496                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3497
3498                 /* PFP */
3499                 fw_data = (const __le32 *)
3500                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3501                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3502                 WREG32(CP_PFP_UCODE_ADDR, 0);
3503                 for (i = 0; i < fw_size; i++)
3504                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3505                 WREG32(CP_PFP_UCODE_ADDR, 0);
3506
3507                 /* CE */
3508                 fw_data = (const __le32 *)
3509                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3510                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3511                 WREG32(CP_CE_UCODE_ADDR, 0);
3512                 for (i = 0; i < fw_size; i++)
3513                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3514                 WREG32(CP_CE_UCODE_ADDR, 0);
3515
3516                 /* ME */
3517                 fw_data = (const __be32 *)
3518                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3519                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3520                 WREG32(CP_ME_RAM_WADDR, 0);
3521                 for (i = 0; i < fw_size; i++)
3522                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3523                 WREG32(CP_ME_RAM_WADDR, 0);
3524         } else {
3525                 const __be32 *fw_data;
3526
3527                 /* PFP */
3528                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3529                 WREG32(CP_PFP_UCODE_ADDR, 0);
3530                 for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3531                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3532                 WREG32(CP_PFP_UCODE_ADDR, 0);
3533
3534                 /* CE */
3535                 fw_data = (const __be32 *)rdev->ce_fw->data;
3536                 WREG32(CP_CE_UCODE_ADDR, 0);
3537                 for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3538                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3539                 WREG32(CP_CE_UCODE_ADDR, 0);
3540
3541                 /* ME */
3542                 fw_data = (const __be32 *)rdev->me_fw->data;
3543                 WREG32(CP_ME_RAM_WADDR, 0);
3544                 for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3545                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3546                 WREG32(CP_ME_RAM_WADDR, 0);
3547         }
3548
3549         WREG32(CP_PFP_UCODE_ADDR, 0);
3550         WREG32(CP_CE_UCODE_ADDR, 0);
3551         WREG32(CP_ME_RAM_WADDR, 0);
3552         WREG32(CP_ME_RAM_RADDR, 0);
3553         return 0;
3554 }
3555
3556 static int si_cp_start(struct radeon_device *rdev)
3557 {
3558         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3559         int r, i;
3560
3561         r = radeon_ring_lock(rdev, ring, 7 + 4);
3562         if (r) {
3563                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3564                 return r;
3565         }
3566         /* init the CP */
3567         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3568         radeon_ring_write(ring, 0x1);
3569         radeon_ring_write(ring, 0x0);
3570         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3571         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3572         radeon_ring_write(ring, 0);
3573         radeon_ring_write(ring, 0);
3574
3575         /* init the CE partitions */
3576         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3577         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3578         radeon_ring_write(ring, 0xc000);
3579         radeon_ring_write(ring, 0xe000);
3580         radeon_ring_unlock_commit(rdev, ring, false);
3581
3582         si_cp_enable(rdev, true);
3583
3584         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3585         if (r) {
3586                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3587                 return r;
3588         }
3589
3590         /* setup clear context state */
3591         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3592         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3593
3594         for (i = 0; i < si_default_size; i++)
3595                 radeon_ring_write(ring, si_default_state[i]);
3596
3597         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3598         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3599
3600         /* set clear context state */
3601         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3602         radeon_ring_write(ring, 0);
3603
3604         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3605         radeon_ring_write(ring, 0x00000316);
3606         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3607         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3608
3609         radeon_ring_unlock_commit(rdev, ring, false);
3610
3611         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3612                 ring = &rdev->ring[i];
3613                 r = radeon_ring_lock(rdev, ring, 2);
3614
3615                 /* clear the compute context state */
3616                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3617                 radeon_ring_write(ring, 0);
3618
3619                 radeon_ring_unlock_commit(rdev, ring, false);
3620         }
3621
3622         return 0;
3623 }
3624
3625 static void si_cp_fini(struct radeon_device *rdev)
3626 {
3627         struct radeon_ring *ring;
3628         si_cp_enable(rdev, false);
3629
3630         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3631         radeon_ring_fini(rdev, ring);
3632         radeon_scratch_free(rdev, ring->rptr_save_reg);
3633
3634         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3635         radeon_ring_fini(rdev, ring);
3636         radeon_scratch_free(rdev, ring->rptr_save_reg);
3637
3638         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3639         radeon_ring_fini(rdev, ring);
3640         radeon_scratch_free(rdev, ring->rptr_save_reg);
3641 }
3642
3643 static int si_cp_resume(struct radeon_device *rdev)
3644 {
3645         struct radeon_ring *ring;
3646         u32 tmp;
3647         u32 rb_bufsz;
3648         int r;
3649
3650         si_enable_gui_idle_interrupt(rdev, false);
3651
3652         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3653         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3654
3655         /* Set the write pointer delay */
3656         WREG32(CP_RB_WPTR_DELAY, 0);
3657
3658         WREG32(CP_DEBUG, 0);
3659         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3660
3661         /* ring 0 - compute and gfx */
3662         /* Set ring buffer size */
3663         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3664         rb_bufsz = order_base_2(ring->ring_size / 8);
3665         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3666 #ifdef __BIG_ENDIAN
3667         tmp |= BUF_SWAP_32BIT;
3668 #endif
3669         WREG32(CP_RB0_CNTL, tmp);
3670
3671         /* Initialize the ring buffer's read and write pointers */
3672         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3673         ring->wptr = 0;
3674         WREG32(CP_RB0_WPTR, ring->wptr);
3675
3676         /* set the wb address whether it's enabled or not */
3677         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3678         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3679
3680         if (rdev->wb.enabled)
3681                 WREG32(SCRATCH_UMSK, 0xff);
3682         else {
3683                 tmp |= RB_NO_UPDATE;
3684                 WREG32(SCRATCH_UMSK, 0);
3685         }
3686
3687         mdelay(1);
3688         WREG32(CP_RB0_CNTL, tmp);
3689
3690         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3691
3692         /* ring1  - compute only */
3693         /* Set ring buffer size */
3694         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3695         rb_bufsz = order_base_2(ring->ring_size / 8);
3696         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3697 #ifdef __BIG_ENDIAN
3698         tmp |= BUF_SWAP_32BIT;
3699 #endif
3700         WREG32(CP_RB1_CNTL, tmp);
3701
3702         /* Initialize the ring buffer's read and write pointers */
3703         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3704         ring->wptr = 0;
3705         WREG32(CP_RB1_WPTR, ring->wptr);
3706
3707         /* set the wb address whether it's enabled or not */
3708         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3709         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3710
3711         mdelay(1);
3712         WREG32(CP_RB1_CNTL, tmp);
3713
3714         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3715
3716         /* ring2 - compute only */
3717         /* Set ring buffer size */
3718         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3719         rb_bufsz = order_base_2(ring->ring_size / 8);
3720         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3721 #ifdef __BIG_ENDIAN
3722         tmp |= BUF_SWAP_32BIT;
3723 #endif
3724         WREG32(CP_RB2_CNTL, tmp);
3725
3726         /* Initialize the ring buffer's read and write pointers */
3727         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3728         ring->wptr = 0;
3729         WREG32(CP_RB2_WPTR, ring->wptr);
3730
3731         /* set the wb address whether it's enabled or not */
3732         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3733         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3734
3735         mdelay(1);
3736         WREG32(CP_RB2_CNTL, tmp);
3737
3738         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3739
3740         /* start the rings */
3741         si_cp_start(rdev);
3742         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3743         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3744         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3745         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3746         if (r) {
3747                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3748                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3749                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3750                 return r;
3751         }
3752         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3753         if (r) {
3754                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3755         }
3756         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3757         if (r) {
3758                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3759         }
3760
3761         si_enable_gui_idle_interrupt(rdev, true);
3762
3763         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3764                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3765
3766         return 0;
3767 }
3768
3769 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3770 {
3771         u32 reset_mask = 0;
3772         u32 tmp;
3773
3774         /* GRBM_STATUS */
3775         tmp = RREG32(GRBM_STATUS);
3776         if (tmp & (PA_BUSY | SC_BUSY |
3777                    BCI_BUSY | SX_BUSY |
3778                    TA_BUSY | VGT_BUSY |
3779                    DB_BUSY | CB_BUSY |
3780                    GDS_BUSY | SPI_BUSY |
3781                    IA_BUSY | IA_BUSY_NO_DMA))
3782                 reset_mask |= RADEON_RESET_GFX;
3783
3784         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3785                    CP_BUSY | CP_COHERENCY_BUSY))
3786                 reset_mask |= RADEON_RESET_CP;
3787
3788         if (tmp & GRBM_EE_BUSY)
3789                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3790
3791         /* GRBM_STATUS2 */
3792         tmp = RREG32(GRBM_STATUS2);
3793         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3794                 reset_mask |= RADEON_RESET_RLC;
3795
3796         /* DMA_STATUS_REG 0 */
3797         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3798         if (!(tmp & DMA_IDLE))
3799                 reset_mask |= RADEON_RESET_DMA;
3800
3801         /* DMA_STATUS_REG 1 */
3802         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3803         if (!(tmp & DMA_IDLE))
3804                 reset_mask |= RADEON_RESET_DMA1;
3805
3806         /* SRBM_STATUS2 */
3807         tmp = RREG32(SRBM_STATUS2);
3808         if (tmp & DMA_BUSY)
3809                 reset_mask |= RADEON_RESET_DMA;
3810
3811         if (tmp & DMA1_BUSY)
3812                 reset_mask |= RADEON_RESET_DMA1;
3813
3814         /* SRBM_STATUS */
3815         tmp = RREG32(SRBM_STATUS);
3816
3817         if (tmp & IH_BUSY)
3818                 reset_mask |= RADEON_RESET_IH;
3819
3820         if (tmp & SEM_BUSY)
3821                 reset_mask |= RADEON_RESET_SEM;
3822
3823         if (tmp & GRBM_RQ_PENDING)
3824                 reset_mask |= RADEON_RESET_GRBM;
3825
3826         if (tmp & VMC_BUSY)
3827                 reset_mask |= RADEON_RESET_VMC;
3828
3829         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3830                    MCC_BUSY | MCD_BUSY))
3831                 reset_mask |= RADEON_RESET_MC;
3832
3833         if (evergreen_is_display_hung(rdev))
3834                 reset_mask |= RADEON_RESET_DISPLAY;
3835
3836         /* VM_L2_STATUS */
3837         tmp = RREG32(VM_L2_STATUS);
3838         if (tmp & L2_BUSY)
3839                 reset_mask |= RADEON_RESET_VMC;
3840
3841         /* Skip MC reset as it's mostly likely not hung, just busy */
3842         if (reset_mask & RADEON_RESET_MC) {
3843                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3844                 reset_mask &= ~RADEON_RESET_MC;
3845         }
3846
3847         return reset_mask;
3848 }
3849
3850 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3851 {
3852         struct evergreen_mc_save save;
3853         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3854         u32 tmp;
3855
3856         if (reset_mask == 0)
3857                 return;
3858
3859         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3860
3861         evergreen_print_gpu_status_regs(rdev);
3862         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3863                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3864         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3865                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3866
3867         /* disable PG/CG */
3868         si_fini_pg(rdev);
3869         si_fini_cg(rdev);
3870
3871         /* stop the rlc */
3872         si_rlc_stop(rdev);
3873
3874         /* Disable CP parsing/prefetching */
3875         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3876
3877         if (reset_mask & RADEON_RESET_DMA) {
3878                 /* dma0 */
3879                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3880                 tmp &= ~DMA_RB_ENABLE;
3881                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3882         }
3883         if (reset_mask & RADEON_RESET_DMA1) {
3884                 /* dma1 */
3885                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3886                 tmp &= ~DMA_RB_ENABLE;
3887                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3888         }
3889
3890         udelay(50);
3891
3892         evergreen_mc_stop(rdev, &save);
3893         if (evergreen_mc_wait_for_idle(rdev)) {
3894                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3895         }
3896
3897         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3898                 grbm_soft_reset = SOFT_RESET_CB |
3899                         SOFT_RESET_DB |
3900                         SOFT_RESET_GDS |
3901                         SOFT_RESET_PA |
3902                         SOFT_RESET_SC |
3903                         SOFT_RESET_BCI |
3904                         SOFT_RESET_SPI |
3905                         SOFT_RESET_SX |
3906                         SOFT_RESET_TC |
3907                         SOFT_RESET_TA |
3908                         SOFT_RESET_VGT |
3909                         SOFT_RESET_IA;
3910         }
3911
3912         if (reset_mask & RADEON_RESET_CP) {
3913                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3914
3915                 srbm_soft_reset |= SOFT_RESET_GRBM;
3916         }
3917
3918         if (reset_mask & RADEON_RESET_DMA)
3919                 srbm_soft_reset |= SOFT_RESET_DMA;
3920
3921         if (reset_mask & RADEON_RESET_DMA1)
3922                 srbm_soft_reset |= SOFT_RESET_DMA1;
3923
3924         if (reset_mask & RADEON_RESET_DISPLAY)
3925                 srbm_soft_reset |= SOFT_RESET_DC;
3926
3927         if (reset_mask & RADEON_RESET_RLC)
3928                 grbm_soft_reset |= SOFT_RESET_RLC;
3929
3930         if (reset_mask & RADEON_RESET_SEM)
3931                 srbm_soft_reset |= SOFT_RESET_SEM;
3932
3933         if (reset_mask & RADEON_RESET_IH)
3934                 srbm_soft_reset |= SOFT_RESET_IH;
3935
3936         if (reset_mask & RADEON_RESET_GRBM)
3937                 srbm_soft_reset |= SOFT_RESET_GRBM;
3938
3939         if (reset_mask & RADEON_RESET_VMC)
3940                 srbm_soft_reset |= SOFT_RESET_VMC;
3941
3942         if (reset_mask & RADEON_RESET_MC)
3943                 srbm_soft_reset |= SOFT_RESET_MC;
3944
3945         if (grbm_soft_reset) {
3946                 tmp = RREG32(GRBM_SOFT_RESET);
3947                 tmp |= grbm_soft_reset;
3948                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3949                 WREG32(GRBM_SOFT_RESET, tmp);
3950                 tmp = RREG32(GRBM_SOFT_RESET);
3951
3952                 udelay(50);
3953
3954                 tmp &= ~grbm_soft_reset;
3955                 WREG32(GRBM_SOFT_RESET, tmp);
3956                 tmp = RREG32(GRBM_SOFT_RESET);
3957         }
3958
3959         if (srbm_soft_reset) {
3960                 tmp = RREG32(SRBM_SOFT_RESET);
3961                 tmp |= srbm_soft_reset;
3962                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3963                 WREG32(SRBM_SOFT_RESET, tmp);
3964                 tmp = RREG32(SRBM_SOFT_RESET);
3965
3966                 udelay(50);
3967
3968                 tmp &= ~srbm_soft_reset;
3969                 WREG32(SRBM_SOFT_RESET, tmp);
3970                 tmp = RREG32(SRBM_SOFT_RESET);
3971         }
3972
3973         /* Wait a little for things to settle down */
3974         udelay(50);
3975
3976         evergreen_mc_resume(rdev, &save);
3977         udelay(50);
3978
3979         evergreen_print_gpu_status_regs(rdev);
3980 }
3981
3982 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3983 {
3984         u32 tmp, i;
3985
3986         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3987         tmp |= SPLL_BYPASS_EN;
3988         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3989
3990         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3991         tmp |= SPLL_CTLREQ_CHG;
3992         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3993
3994         for (i = 0; i < rdev->usec_timeout; i++) {
3995                 if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3996                         break;
3997                 udelay(1);
3998         }
3999
4000         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
4001         tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
4002         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
4003
4004         tmp = RREG32(MPLL_CNTL_MODE);
4005         tmp &= ~MPLL_MCLK_SEL;
4006         WREG32(MPLL_CNTL_MODE, tmp);
4007 }
4008
4009 static void si_spll_powerdown(struct radeon_device *rdev)
4010 {
4011         u32 tmp;
4012
4013         tmp = RREG32(SPLL_CNTL_MODE);
4014         tmp |= SPLL_SW_DIR_CONTROL;
4015         WREG32(SPLL_CNTL_MODE, tmp);
4016
4017         tmp = RREG32(CG_SPLL_FUNC_CNTL);
4018         tmp |= SPLL_RESET;
4019         WREG32(CG_SPLL_FUNC_CNTL, tmp);
4020
4021         tmp = RREG32(CG_SPLL_FUNC_CNTL);
4022         tmp |= SPLL_SLEEP;
4023         WREG32(CG_SPLL_FUNC_CNTL, tmp);
4024
4025         tmp = RREG32(SPLL_CNTL_MODE);
4026         tmp &= ~SPLL_SW_DIR_CONTROL;
4027         WREG32(SPLL_CNTL_MODE, tmp);
4028 }
4029
4030 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4031 {
4032         struct evergreen_mc_save save;
4033         u32 tmp, i;
4034
4035         dev_info(rdev->dev, "GPU pci config reset\n");
4036
4037         /* disable dpm? */
4038
4039         /* disable cg/pg */
4040         si_fini_pg(rdev);
4041         si_fini_cg(rdev);
4042
4043         /* Disable CP parsing/prefetching */
4044         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4045         /* dma0 */
4046         tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4047         tmp &= ~DMA_RB_ENABLE;
4048         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4049         /* dma1 */
4050         tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4051         tmp &= ~DMA_RB_ENABLE;
4052         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4053         /* XXX other engines? */
4054
4055         /* halt the rlc, disable cp internal ints */
4056         si_rlc_stop(rdev);
4057
4058         udelay(50);
4059
4060         /* disable mem access */
4061         evergreen_mc_stop(rdev, &save);
4062         if (evergreen_mc_wait_for_idle(rdev)) {
4063                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4064         }
4065
4066         /* set mclk/sclk to bypass */
4067         si_set_clk_bypass_mode(rdev);
4068         /* powerdown spll */
4069         si_spll_powerdown(rdev);
4070         /* disable BM */
4071         pci_clear_master(rdev->pdev);
4072         /* reset */
4073         radeon_pci_config_reset(rdev);
4074         /* wait for asic to come out of reset */
4075         for (i = 0; i < rdev->usec_timeout; i++) {
4076                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4077                         break;
4078                 udelay(1);
4079         }
4080 }
4081
4082 int si_asic_reset(struct radeon_device *rdev, bool hard)
4083 {
4084         u32 reset_mask;
4085
4086         if (hard) {
4087                 si_gpu_pci_config_reset(rdev);
4088                 return 0;
4089         }
4090
4091         reset_mask = si_gpu_check_soft_reset(rdev);
4092
4093         if (reset_mask)
4094                 r600_set_bios_scratch_engine_hung(rdev, true);
4095
4096         /* try soft reset */
4097         si_gpu_soft_reset(rdev, reset_mask);
4098
4099         reset_mask = si_gpu_check_soft_reset(rdev);
4100
4101         /* try pci config reset */
4102         if (reset_mask && radeon_hard_reset)
4103                 si_gpu_pci_config_reset(rdev);
4104
4105         reset_mask = si_gpu_check_soft_reset(rdev);
4106
4107         if (!reset_mask)
4108                 r600_set_bios_scratch_engine_hung(rdev, false);
4109
4110         return 0;
4111 }
4112
4113 /**
4114  * si_gfx_is_lockup - Check if the GFX engine is locked up
4115  *
4116  * @rdev: radeon_device pointer
4117  * @ring: radeon_ring structure holding ring information
4118  *
4119  * Check if the GFX engine is locked up.
4120  * Returns true if the engine appears to be locked up, false if not.
4121  */
4122 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4123 {
4124         u32 reset_mask = si_gpu_check_soft_reset(rdev);
4125
4126         if (!(reset_mask & (RADEON_RESET_GFX |
4127                             RADEON_RESET_COMPUTE |
4128                             RADEON_RESET_CP))) {
4129                 radeon_ring_lockup_update(rdev, ring);
4130                 return false;
4131         }
4132         return radeon_ring_test_lockup(rdev, ring);
4133 }
4134
4135 /* MC */
4136 static void si_mc_program(struct radeon_device *rdev)
4137 {
4138         struct evergreen_mc_save save;
4139         u32 tmp;
4140         int i, j;
4141
4142         /* Initialize HDP */
4143         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4144                 WREG32((0x2c14 + j), 0x00000000);
4145                 WREG32((0x2c18 + j), 0x00000000);
4146                 WREG32((0x2c1c + j), 0x00000000);
4147                 WREG32((0x2c20 + j), 0x00000000);
4148                 WREG32((0x2c24 + j), 0x00000000);
4149         }
4150         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4151
4152         evergreen_mc_stop(rdev, &save);
4153         if (radeon_mc_wait_for_idle(rdev)) {
4154                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4155         }
4156         if (!ASIC_IS_NODCE(rdev))
4157                 /* Lockout access through VGA aperture*/
4158                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4159         /* Update configuration */
4160         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4161                rdev->mc.vram_start >> 12);
4162         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4163                rdev->mc.vram_end >> 12);
4164         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4165                rdev->vram_scratch.gpu_addr >> 12);
4166         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4167         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4168         WREG32(MC_VM_FB_LOCATION, tmp);
4169         /* XXX double check these! */
4170         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4171         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4172         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4173         WREG32(MC_VM_AGP_BASE, 0);
4174         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4175         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4176         if (radeon_mc_wait_for_idle(rdev)) {
4177                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4178         }
4179         evergreen_mc_resume(rdev, &save);
4180         if (!ASIC_IS_NODCE(rdev)) {
4181                 /* we need to own VRAM, so turn off the VGA renderer here
4182                  * to stop it overwriting our objects */
4183                 rv515_vga_render_disable(rdev);
4184         }
4185 }
4186
4187 void si_vram_gtt_location(struct radeon_device *rdev,
4188                           struct radeon_mc *mc)
4189 {
4190         if (mc->mc_vram_size > 0xFFC0000000ULL) {
4191                 /* leave room for at least 1024M GTT */
4192                 dev_warn(rdev->dev, "limiting VRAM\n");
4193                 mc->real_vram_size = 0xFFC0000000ULL;
4194                 mc->mc_vram_size = 0xFFC0000000ULL;
4195         }
4196         radeon_vram_location(rdev, &rdev->mc, 0);
4197         rdev->mc.gtt_base_align = 0;
4198         radeon_gtt_location(rdev, mc);
4199 }
4200
4201 static int si_mc_init(struct radeon_device *rdev)
4202 {
4203         u32 tmp;
4204         int chansize, numchan;
4205
4206         /* Get VRAM informations */
4207         rdev->mc.vram_is_ddr = true;
4208         tmp = RREG32(MC_ARB_RAMCFG);
4209         if (tmp & CHANSIZE_OVERRIDE) {
4210                 chansize = 16;
4211         } else if (tmp & CHANSIZE_MASK) {
4212                 chansize = 64;
4213         } else {
4214                 chansize = 32;
4215         }
4216         tmp = RREG32(MC_SHARED_CHMAP);
4217         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4218         case 0:
4219         default:
4220                 numchan = 1;
4221                 break;
4222         case 1:
4223                 numchan = 2;
4224                 break;
4225         case 2:
4226                 numchan = 4;
4227                 break;
4228         case 3:
4229                 numchan = 8;
4230                 break;
4231         case 4:
4232                 numchan = 3;
4233                 break;
4234         case 5:
4235                 numchan = 6;
4236                 break;
4237         case 6:
4238                 numchan = 10;
4239                 break;
4240         case 7:
4241                 numchan = 12;
4242                 break;
4243         case 8:
4244                 numchan = 16;
4245                 break;
4246         }
4247         rdev->mc.vram_width = numchan * chansize;
4248         /* Could aper size report 0 ? */
4249         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4250         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4251         /* size in MB on si */
4252         tmp = RREG32(CONFIG_MEMSIZE);
4253         /* some boards may have garbage in the upper 16 bits */
4254         if (tmp & 0xffff0000) {
4255                 DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4256                 if (tmp & 0xffff)
4257                         tmp &= 0xffff;
4258         }
4259         rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4260         rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4261         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4262         si_vram_gtt_location(rdev, &rdev->mc);
4263         radeon_update_bandwidth_info(rdev);
4264
4265         return 0;
4266 }
4267
4268 /*
4269  * GART
4270  */
4271 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4272 {
4273         /* flush hdp cache */
4274         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4275
4276         /* bits 0-15 are the VM contexts0-15 */
4277         WREG32(VM_INVALIDATE_REQUEST, 1);
4278 }
4279
4280 static int si_pcie_gart_enable(struct radeon_device *rdev)
4281 {
4282         int r, i;
4283
4284         if (rdev->gart.robj == NULL) {
4285                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4286                 return -EINVAL;
4287         }
4288         r = radeon_gart_table_vram_pin(rdev);
4289         if (r)
4290                 return r;
4291         /* Setup TLB control */
4292         WREG32(MC_VM_MX_L1_TLB_CNTL,
4293                (0xA << 7) |
4294                ENABLE_L1_TLB |
4295                ENABLE_L1_FRAGMENT_PROCESSING |
4296                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4297                ENABLE_ADVANCED_DRIVER_MODEL |
4298                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4299         /* Setup L2 cache */
4300         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4301                ENABLE_L2_FRAGMENT_PROCESSING |
4302                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4303                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4304                EFFECTIVE_L2_QUEUE_SIZE(7) |
4305                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4306         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4307         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4308                BANK_SELECT(4) |
4309                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4310         /* setup context0 */
4311         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4312         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4313         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4314         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4315                         (u32)(rdev->dummy_page.addr >> 12));
4316         WREG32(VM_CONTEXT0_CNTL2, 0);
4317         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4318                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4319
4320         WREG32(0x15D4, 0);
4321         WREG32(0x15D8, 0);
4322         WREG32(0x15DC, 0);
4323
4324         /* empty context1-15 */
4325         /* set vm size, must be a multiple of 4 */
4326         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4327         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4328         /* Assign the pt base to something valid for now; the pts used for
4329          * the VMs are determined by the application and setup and assigned
4330          * on the fly in the vm part of radeon_gart.c
4331          */
4332         for (i = 1; i < 16; i++) {
4333                 if (i < 8)
4334                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4335                                rdev->vm_manager.saved_table_addr[i]);
4336                 else
4337                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4338                                rdev->vm_manager.saved_table_addr[i]);
4339         }
4340
4341         /* enable context1-15 */
4342         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4343                (u32)(rdev->dummy_page.addr >> 12));
4344         WREG32(VM_CONTEXT1_CNTL2, 4);
4345         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4346                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4347                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4348                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4349                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4350                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4351                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4352                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4353                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4354                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4355                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4356                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4357                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4358                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4359
4360         si_pcie_gart_tlb_flush(rdev);
4361         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4362                  (unsigned)(rdev->mc.gtt_size >> 20),
4363                  (unsigned long long)rdev->gart.table_addr);
4364         rdev->gart.ready = true;
4365         return 0;
4366 }
4367
4368 static void si_pcie_gart_disable(struct radeon_device *rdev)
4369 {
4370         unsigned i;
4371
4372         for (i = 1; i < 16; ++i) {
4373                 uint32_t reg;
4374                 if (i < 8)
4375                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4376                 else
4377                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4378                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4379         }
4380
4381         /* Disable all tables */
4382         WREG32(VM_CONTEXT0_CNTL, 0);
4383         WREG32(VM_CONTEXT1_CNTL, 0);
4384         /* Setup TLB control */
4385         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4386                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4387         /* Setup L2 cache */
4388         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4389                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4390                EFFECTIVE_L2_QUEUE_SIZE(7) |
4391                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4392         WREG32(VM_L2_CNTL2, 0);
4393         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4394                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4395         radeon_gart_table_vram_unpin(rdev);
4396 }
4397
4398 static void si_pcie_gart_fini(struct radeon_device *rdev)
4399 {
4400         si_pcie_gart_disable(rdev);
4401         radeon_gart_table_vram_free(rdev);
4402         radeon_gart_fini(rdev);
4403 }
4404
4405 /* vm parser */
4406 static bool si_vm_reg_valid(u32 reg)
4407 {
4408         /* context regs are fine */
4409         if (reg >= 0x28000)
4410                 return true;
4411
4412         /* shader regs are also fine */
4413         if (reg >= 0xB000 && reg < 0xC000)
4414                 return true;
4415
4416         /* check config regs */
4417         switch (reg) {
4418         case GRBM_GFX_INDEX:
4419         case CP_STRMOUT_CNTL:
4420         case VGT_VTX_VECT_EJECT_REG:
4421         case VGT_CACHE_INVALIDATION:
4422         case VGT_ESGS_RING_SIZE:
4423         case VGT_GSVS_RING_SIZE:
4424         case VGT_GS_VERTEX_REUSE:
4425         case VGT_PRIMITIVE_TYPE:
4426         case VGT_INDEX_TYPE:
4427         case VGT_NUM_INDICES:
4428         case VGT_NUM_INSTANCES:
4429         case VGT_TF_RING_SIZE:
4430         case VGT_HS_OFFCHIP_PARAM:
4431         case VGT_TF_MEMORY_BASE:
4432         case PA_CL_ENHANCE:
4433         case PA_SU_LINE_STIPPLE_VALUE:
4434         case PA_SC_LINE_STIPPLE_STATE:
4435         case PA_SC_ENHANCE:
4436         case SQC_CACHES:
4437         case SPI_STATIC_THREAD_MGMT_1:
4438         case SPI_STATIC_THREAD_MGMT_2:
4439         case SPI_STATIC_THREAD_MGMT_3:
4440         case SPI_PS_MAX_WAVE_ID:
4441         case SPI_CONFIG_CNTL:
4442         case SPI_CONFIG_CNTL_1:
4443         case TA_CNTL_AUX:
4444         case TA_CS_BC_BASE_ADDR:
4445                 return true;
4446         default:
4447                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4448                 return false;
4449         }
4450 }
4451
4452 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4453                                   u32 *ib, struct radeon_cs_packet *pkt)
4454 {
4455         switch (pkt->opcode) {
4456         case PACKET3_NOP:
4457         case PACKET3_SET_BASE:
4458         case PACKET3_SET_CE_DE_COUNTERS:
4459         case PACKET3_LOAD_CONST_RAM:
4460         case PACKET3_WRITE_CONST_RAM:
4461         case PACKET3_WRITE_CONST_RAM_OFFSET:
4462         case PACKET3_DUMP_CONST_RAM:
4463         case PACKET3_INCREMENT_CE_COUNTER:
4464         case PACKET3_WAIT_ON_DE_COUNTER:
4465         case PACKET3_CE_WRITE:
4466                 break;
4467         default:
4468                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4469                 return -EINVAL;
4470         }
4471         return 0;
4472 }
4473
4474 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4475 {
4476         u32 start_reg, reg, i;
4477         u32 command = ib[idx + 4];
4478         u32 info = ib[idx + 1];
4479         u32 idx_value = ib[idx];
4480         if (command & PACKET3_CP_DMA_CMD_SAS) {
4481                 /* src address space is register */
4482                 if (((info & 0x60000000) >> 29) == 0) {
4483                         start_reg = idx_value << 2;
4484                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
4485                                 reg = start_reg;
4486                                 if (!si_vm_reg_valid(reg)) {
4487                                         DRM_ERROR("CP DMA Bad SRC register\n");
4488                                         return -EINVAL;
4489                                 }
4490                         } else {
4491                                 for (i = 0; i < (command & 0x1fffff); i++) {
4492                                         reg = start_reg + (4 * i);
4493                                         if (!si_vm_reg_valid(reg)) {
4494                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4495                                                 return -EINVAL;
4496                                         }
4497                                 }
4498                         }
4499                 }
4500         }
4501         if (command & PACKET3_CP_DMA_CMD_DAS) {
4502                 /* dst address space is register */
4503                 if (((info & 0x00300000) >> 20) == 0) {
4504                         start_reg = ib[idx + 2];
4505                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
4506                                 reg = start_reg;
4507                                 if (!si_vm_reg_valid(reg)) {
4508                                         DRM_ERROR("CP DMA Bad DST register\n");
4509                                         return -EINVAL;
4510                                 }
4511                         } else {
4512                                 for (i = 0; i < (command & 0x1fffff); i++) {
4513                                         reg = start_reg + (4 * i);
4514                                         if (!si_vm_reg_valid(reg)) {
4515                                                 DRM_ERROR("CP DMA Bad DST register\n");
4516                                                 return -EINVAL;
4517                                         }
4518                                 }
4519                         }
4520                 }
4521         }
4522         return 0;
4523 }
4524
4525 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4526                                    u32 *ib, struct radeon_cs_packet *pkt)
4527 {
4528         int r;
4529         u32 idx = pkt->idx + 1;
4530         u32 idx_value = ib[idx];
4531         u32 start_reg, end_reg, reg, i;
4532
4533         switch (pkt->opcode) {
4534         case PACKET3_NOP:
4535         case PACKET3_SET_BASE:
4536         case PACKET3_CLEAR_STATE:
4537         case PACKET3_INDEX_BUFFER_SIZE:
4538         case PACKET3_DISPATCH_DIRECT:
4539         case PACKET3_DISPATCH_INDIRECT:
4540         case PACKET3_ALLOC_GDS:
4541         case PACKET3_WRITE_GDS_RAM:
4542         case PACKET3_ATOMIC_GDS:
4543         case PACKET3_ATOMIC:
4544         case PACKET3_OCCLUSION_QUERY:
4545         case PACKET3_SET_PREDICATION:
4546         case PACKET3_COND_EXEC:
4547         case PACKET3_PRED_EXEC:
4548         case PACKET3_DRAW_INDIRECT:
4549         case PACKET3_DRAW_INDEX_INDIRECT:
4550         case PACKET3_INDEX_BASE:
4551         case PACKET3_DRAW_INDEX_2:
4552         case PACKET3_CONTEXT_CONTROL:
4553         case PACKET3_INDEX_TYPE:
4554         case PACKET3_DRAW_INDIRECT_MULTI:
4555         case PACKET3_DRAW_INDEX_AUTO:
4556         case PACKET3_DRAW_INDEX_IMMD:
4557         case PACKET3_NUM_INSTANCES:
4558         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4559         case PACKET3_STRMOUT_BUFFER_UPDATE:
4560         case PACKET3_DRAW_INDEX_OFFSET_2:
4561         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4562         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4563         case PACKET3_MPEG_INDEX:
4564         case PACKET3_WAIT_REG_MEM:
4565         case PACKET3_MEM_WRITE:
4566         case PACKET3_PFP_SYNC_ME:
4567         case PACKET3_SURFACE_SYNC:
4568         case PACKET3_EVENT_WRITE:
4569         case PACKET3_EVENT_WRITE_EOP:
4570         case PACKET3_EVENT_WRITE_EOS:
4571         case PACKET3_SET_CONTEXT_REG:
4572         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4573         case PACKET3_SET_SH_REG:
4574         case PACKET3_SET_SH_REG_OFFSET:
4575         case PACKET3_INCREMENT_DE_COUNTER:
4576         case PACKET3_WAIT_ON_CE_COUNTER:
4577         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4578         case PACKET3_ME_WRITE:
4579                 break;
4580         case PACKET3_COPY_DATA:
4581                 if ((idx_value & 0xf00) == 0) {
4582                         reg = ib[idx + 3] * 4;
4583                         if (!si_vm_reg_valid(reg))
4584                                 return -EINVAL;
4585                 }
4586                 break;
4587         case PACKET3_WRITE_DATA:
4588                 if ((idx_value & 0xf00) == 0) {
4589                         start_reg = ib[idx + 1] * 4;
4590                         if (idx_value & 0x10000) {
4591                                 if (!si_vm_reg_valid(start_reg))
4592                                         return -EINVAL;
4593                         } else {
4594                                 for (i = 0; i < (pkt->count - 2); i++) {
4595                                         reg = start_reg + (4 * i);
4596                                         if (!si_vm_reg_valid(reg))
4597                                                 return -EINVAL;
4598                                 }
4599                         }
4600                 }
4601                 break;
4602         case PACKET3_COND_WRITE:
4603                 if (idx_value & 0x100) {
4604                         reg = ib[idx + 5] * 4;
4605                         if (!si_vm_reg_valid(reg))
4606                                 return -EINVAL;
4607                 }
4608                 break;
4609         case PACKET3_COPY_DW:
4610                 if (idx_value & 0x2) {
4611                         reg = ib[idx + 3] * 4;
4612                         if (!si_vm_reg_valid(reg))
4613                                 return -EINVAL;
4614                 }
4615                 break;
4616         case PACKET3_SET_CONFIG_REG:
4617                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4618                 end_reg = 4 * pkt->count + start_reg - 4;
4619                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4620                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4621                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4622                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4623                         return -EINVAL;
4624                 }
4625                 for (i = 0; i < pkt->count; i++) {
4626                         reg = start_reg + (4 * i);
4627                         if (!si_vm_reg_valid(reg))
4628                                 return -EINVAL;
4629                 }
4630                 break;
4631         case PACKET3_CP_DMA:
4632                 r = si_vm_packet3_cp_dma_check(ib, idx);
4633                 if (r)
4634                         return r;
4635                 break;
4636         default:
4637                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4638                 return -EINVAL;
4639         }
4640         return 0;
4641 }
4642
4643 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4644                                        u32 *ib, struct radeon_cs_packet *pkt)
4645 {
4646         int r;
4647         u32 idx = pkt->idx + 1;
4648         u32 idx_value = ib[idx];
4649         u32 start_reg, reg, i;
4650
4651         switch (pkt->opcode) {
4652         case PACKET3_NOP:
4653         case PACKET3_SET_BASE:
4654         case PACKET3_CLEAR_STATE:
4655         case PACKET3_DISPATCH_DIRECT:
4656         case PACKET3_DISPATCH_INDIRECT:
4657         case PACKET3_ALLOC_GDS:
4658         case PACKET3_WRITE_GDS_RAM:
4659         case PACKET3_ATOMIC_GDS:
4660         case PACKET3_ATOMIC:
4661         case PACKET3_OCCLUSION_QUERY:
4662         case PACKET3_SET_PREDICATION:
4663         case PACKET3_COND_EXEC:
4664         case PACKET3_PRED_EXEC:
4665         case PACKET3_CONTEXT_CONTROL:
4666         case PACKET3_STRMOUT_BUFFER_UPDATE:
4667         case PACKET3_WAIT_REG_MEM:
4668         case PACKET3_MEM_WRITE:
4669         case PACKET3_PFP_SYNC_ME:
4670         case PACKET3_SURFACE_SYNC:
4671         case PACKET3_EVENT_WRITE:
4672         case PACKET3_EVENT_WRITE_EOP:
4673         case PACKET3_EVENT_WRITE_EOS:
4674         case PACKET3_SET_CONTEXT_REG:
4675         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4676         case PACKET3_SET_SH_REG:
4677         case PACKET3_SET_SH_REG_OFFSET:
4678         case PACKET3_INCREMENT_DE_COUNTER:
4679         case PACKET3_WAIT_ON_CE_COUNTER:
4680         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4681         case PACKET3_ME_WRITE:
4682                 break;
4683         case PACKET3_COPY_DATA:
4684                 if ((idx_value & 0xf00) == 0) {
4685                         reg = ib[idx + 3] * 4;
4686                         if (!si_vm_reg_valid(reg))
4687                                 return -EINVAL;
4688                 }
4689                 break;
4690         case PACKET3_WRITE_DATA:
4691                 if ((idx_value & 0xf00) == 0) {
4692                         start_reg = ib[idx + 1] * 4;
4693                         if (idx_value & 0x10000) {
4694                                 if (!si_vm_reg_valid(start_reg))
4695                                         return -EINVAL;
4696                         } else {
4697                                 for (i = 0; i < (pkt->count - 2); i++) {
4698                                         reg = start_reg + (4 * i);
4699                                         if (!si_vm_reg_valid(reg))
4700                                                 return -EINVAL;
4701                                 }
4702                         }
4703                 }
4704                 break;
4705         case PACKET3_COND_WRITE:
4706                 if (idx_value & 0x100) {
4707                         reg = ib[idx + 5] * 4;
4708                         if (!si_vm_reg_valid(reg))
4709                                 return -EINVAL;
4710                 }
4711                 break;
4712         case PACKET3_COPY_DW:
4713                 if (idx_value & 0x2) {
4714                         reg = ib[idx + 3] * 4;
4715                         if (!si_vm_reg_valid(reg))
4716                                 return -EINVAL;
4717                 }
4718                 break;
4719         case PACKET3_CP_DMA:
4720                 r = si_vm_packet3_cp_dma_check(ib, idx);
4721                 if (r)
4722                         return r;
4723                 break;
4724         default:
4725                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4726                 return -EINVAL;
4727         }
4728         return 0;
4729 }
4730
4731 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4732 {
4733         int ret = 0;
4734         u32 idx = 0, i;
4735         struct radeon_cs_packet pkt;
4736
4737         do {
4738                 pkt.idx = idx;
4739                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4740                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4741                 pkt.one_reg_wr = 0;
4742                 switch (pkt.type) {
4743                 case RADEON_PACKET_TYPE0:
4744                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4745                         ret = -EINVAL;
4746                         break;
4747                 case RADEON_PACKET_TYPE2:
4748                         idx += 1;
4749                         break;
4750                 case RADEON_PACKET_TYPE3:
4751                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4752                         if (ib->is_const_ib)
4753                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4754                         else {
4755                                 switch (ib->ring) {
4756                                 case RADEON_RING_TYPE_GFX_INDEX:
4757                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4758                                         break;
4759                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4760                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4761                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4762                                         break;
4763                                 default:
4764                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4765                                         ret = -EINVAL;
4766                                         break;
4767                                 }
4768                         }
4769                         idx += pkt.count + 2;
4770                         break;
4771                 default:
4772                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4773                         ret = -EINVAL;
4774                         break;
4775                 }
4776                 if (ret) {
4777                         for (i = 0; i < ib->length_dw; i++) {
4778                                 if (i == idx)
4779                                         printk("\t0x%08x <---\n", ib->ptr[i]);
4780                                 else
4781                                         printk("\t0x%08x\n", ib->ptr[i]);
4782                         }
4783                         break;
4784                 }
4785         } while (idx < ib->length_dw);
4786
4787         return ret;
4788 }
4789
4790 /*
4791  * vm
4792  */
4793 int si_vm_init(struct radeon_device *rdev)
4794 {
4795         /* number of VMs */
4796         rdev->vm_manager.nvm = 16;
4797         /* base offset of vram pages */
4798         rdev->vm_manager.vram_base_offset = 0;
4799
4800         return 0;
4801 }
4802
4803 void si_vm_fini(struct radeon_device *rdev)
4804 {
4805 }
4806
4807 /**
4808  * si_vm_decode_fault - print human readable fault info
4809  *
4810  * @rdev: radeon_device pointer
4811  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4812  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4813  *
4814  * Print human readable fault information (SI).
4815  */
4816 static void si_vm_decode_fault(struct radeon_device *rdev,
4817                                u32 status, u32 addr)
4818 {
4819         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4820         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4821         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4822         char *block;
4823
4824         if (rdev->family == CHIP_TAHITI) {
4825                 switch (mc_id) {
4826                 case 160:
4827                 case 144:
4828                 case 96:
4829                 case 80:
4830                 case 224:
4831                 case 208:
4832                 case 32:
4833                 case 16:
4834                         block = "CB";
4835                         break;
4836                 case 161:
4837                 case 145:
4838                 case 97:
4839                 case 81:
4840                 case 225:
4841                 case 209:
4842                 case 33:
4843                 case 17:
4844                         block = "CB_FMASK";
4845                         break;
4846                 case 162:
4847                 case 146:
4848                 case 98:
4849                 case 82:
4850                 case 226:
4851                 case 210:
4852                 case 34:
4853                 case 18:
4854                         block = "CB_CMASK";
4855                         break;
4856                 case 163:
4857                 case 147:
4858                 case 99:
4859                 case 83:
4860                 case 227:
4861                 case 211:
4862                 case 35:
4863                 case 19:
4864                         block = "CB_IMMED";
4865                         break;
4866                 case 164:
4867                 case 148:
4868                 case 100:
4869                 case 84:
4870                 case 228:
4871                 case 212:
4872                 case 36:
4873                 case 20:
4874                         block = "DB";
4875                         break;
4876                 case 165:
4877                 case 149:
4878                 case 101:
4879                 case 85:
4880                 case 229:
4881                 case 213:
4882                 case 37:
4883                 case 21:
4884                         block = "DB_HTILE";
4885                         break;
4886                 case 167:
4887                 case 151:
4888                 case 103:
4889                 case 87:
4890                 case 231:
4891                 case 215:
4892                 case 39:
4893                 case 23:
4894                         block = "DB_STEN";
4895                         break;
4896                 case 72:
4897                 case 68:
4898                 case 64:
4899                 case 8:
4900                 case 4:
4901                 case 0:
4902                 case 136:
4903                 case 132:
4904                 case 128:
4905                 case 200:
4906                 case 196:
4907                 case 192:
4908                         block = "TC";
4909                         break;
4910                 case 112:
4911                 case 48:
4912                         block = "CP";
4913                         break;
4914                 case 49:
4915                 case 177:
4916                 case 50:
4917                 case 178:
4918                         block = "SH";
4919                         break;
4920                 case 53:
4921                 case 190:
4922                         block = "VGT";
4923                         break;
4924                 case 117:
4925                         block = "IH";
4926                         break;
4927                 case 51:
4928                 case 115:
4929                         block = "RLC";
4930                         break;
4931                 case 119:
4932                 case 183:
4933                         block = "DMA0";
4934                         break;
4935                 case 61:
4936                         block = "DMA1";
4937                         break;
4938                 case 248:
4939                 case 120:
4940                         block = "HDP";
4941                         break;
4942                 default:
4943                         block = "unknown";
4944                         break;
4945                 }
4946         } else {
4947                 switch (mc_id) {
4948                 case 32:
4949                 case 16:
4950                 case 96:
4951                 case 80:
4952                 case 160:
4953                 case 144:
4954                 case 224:
4955                 case 208:
4956                         block = "CB";
4957                         break;
4958                 case 33:
4959                 case 17:
4960                 case 97:
4961                 case 81:
4962                 case 161:
4963                 case 145:
4964                 case 225:
4965                 case 209:
4966                         block = "CB_FMASK";
4967                         break;
4968                 case 34:
4969                 case 18:
4970                 case 98:
4971                 case 82:
4972                 case 162:
4973                 case 146:
4974                 case 226:
4975                 case 210:
4976                         block = "CB_CMASK";
4977                         break;
4978                 case 35:
4979                 case 19:
4980                 case 99:
4981                 case 83:
4982                 case 163:
4983                 case 147:
4984                 case 227:
4985                 case 211:
4986                         block = "CB_IMMED";
4987                         break;
4988                 case 36:
4989                 case 20:
4990                 case 100:
4991                 case 84:
4992                 case 164:
4993                 case 148:
4994                 case 228:
4995                 case 212:
4996                         block = "DB";
4997                         break;
4998                 case 37:
4999                 case 21:
5000                 case 101:
5001                 case 85:
5002                 case 165:
5003                 case 149:
5004                 case 229:
5005                 case 213:
5006                         block = "DB_HTILE";
5007                         break;
5008                 case 39:
5009                 case 23:
5010                 case 103:
5011                 case 87:
5012                 case 167:
5013                 case 151:
5014                 case 231:
5015                 case 215:
5016                         block = "DB_STEN";
5017                         break;
5018                 case 72:
5019                 case 68:
5020                 case 8:
5021                 case 4:
5022                 case 136:
5023                 case 132:
5024                 case 200:
5025                 case 196:
5026                         block = "TC";
5027                         break;
5028                 case 112:
5029                 case 48:
5030                         block = "CP";
5031                         break;
5032                 case 49:
5033                 case 177:
5034                 case 50:
5035                 case 178:
5036                         block = "SH";
5037                         break;
5038                 case 53:
5039                         block = "VGT";
5040                         break;
5041                 case 117:
5042                         block = "IH";
5043                         break;
5044                 case 51:
5045                 case 115:
5046                         block = "RLC";
5047                         break;
5048                 case 119:
5049                 case 183:
5050                         block = "DMA0";
5051                         break;
5052                 case 61:
5053                         block = "DMA1";
5054                         break;
5055                 case 248:
5056                 case 120:
5057                         block = "HDP";
5058                         break;
5059                 default:
5060                         block = "unknown";
5061                         break;
5062                 }
5063         }
5064
5065         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5066                protections, vmid, addr,
5067                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5068                block, mc_id);
5069 }
5070
5071 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5072                  unsigned vm_id, uint64_t pd_addr)
5073 {
5074         /* write new base address */
5075         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5076         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5077                                  WRITE_DATA_DST_SEL(0)));
5078
5079         if (vm_id < 8) {
5080                 radeon_ring_write(ring,
5081                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5082         } else {
5083                 radeon_ring_write(ring,
5084                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5085         }
5086         radeon_ring_write(ring, 0);
5087         radeon_ring_write(ring, pd_addr >> 12);
5088
5089         /* flush hdp cache */
5090         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5091         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5092                                  WRITE_DATA_DST_SEL(0)));
5093         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5094         radeon_ring_write(ring, 0);
5095         radeon_ring_write(ring, 0x1);
5096
5097         /* bits 0-15 are the VM contexts0-15 */
5098         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5099         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5100                                  WRITE_DATA_DST_SEL(0)));
5101         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5102         radeon_ring_write(ring, 0);
5103         radeon_ring_write(ring, 1 << vm_id);
5104
5105         /* wait for the invalidate to complete */
5106         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5107         radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5108                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5109         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5110         radeon_ring_write(ring, 0);
5111         radeon_ring_write(ring, 0); /* ref */
5112         radeon_ring_write(ring, 0); /* mask */
5113         radeon_ring_write(ring, 0x20); /* poll interval */
5114
5115         /* sync PFP to ME, otherwise we might get invalid PFP reads */
5116         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5117         radeon_ring_write(ring, 0x0);
5118 }
5119
5120 /*
5121  *  Power and clock gating
5122  */
5123 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5124 {
5125         int i;
5126
5127         for (i = 0; i < rdev->usec_timeout; i++) {
5128                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5129                         break;
5130                 udelay(1);
5131         }
5132
5133         for (i = 0; i < rdev->usec_timeout; i++) {
5134                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5135                         break;
5136                 udelay(1);
5137         }
5138 }
5139
5140 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5141                                          bool enable)
5142 {
5143         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5144         u32 mask;
5145         int i;
5146
5147         if (enable)
5148                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5149         else
5150                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5151         WREG32(CP_INT_CNTL_RING0, tmp);
5152
5153         if (!enable) {
5154                 /* read a gfx register */
5155                 tmp = RREG32(DB_DEPTH_INFO);
5156
5157                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5158                 for (i = 0; i < rdev->usec_timeout; i++) {
5159                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5160                                 break;
5161                         udelay(1);
5162                 }
5163         }
5164 }
5165
5166 static void si_set_uvd_dcm(struct radeon_device *rdev,
5167                            bool sw_mode)
5168 {
5169         u32 tmp, tmp2;
5170
5171         tmp = RREG32(UVD_CGC_CTRL);
5172         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5173         tmp |= DCM | CG_DT(1) | CLK_OD(4);
5174
5175         if (sw_mode) {
5176                 tmp &= ~0x7ffff800;
5177                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5178         } else {
5179                 tmp |= 0x7ffff800;
5180                 tmp2 = 0;
5181         }
5182
5183         WREG32(UVD_CGC_CTRL, tmp);
5184         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5185 }
5186
5187 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5188 {
5189         bool hw_mode = true;
5190
5191         if (hw_mode) {
5192                 si_set_uvd_dcm(rdev, false);
5193         } else {
5194                 u32 tmp = RREG32(UVD_CGC_CTRL);
5195                 tmp &= ~DCM;
5196                 WREG32(UVD_CGC_CTRL, tmp);
5197         }
5198 }
5199
5200 static u32 si_halt_rlc(struct radeon_device *rdev)
5201 {
5202         u32 data, orig;
5203
5204         orig = data = RREG32(RLC_CNTL);
5205
5206         if (data & RLC_ENABLE) {
5207                 data &= ~RLC_ENABLE;
5208                 WREG32(RLC_CNTL, data);
5209
5210                 si_wait_for_rlc_serdes(rdev);
5211         }
5212
5213         return orig;
5214 }
5215
5216 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5217 {
5218         u32 tmp;
5219
5220         tmp = RREG32(RLC_CNTL);
5221         if (tmp != rlc)
5222                 WREG32(RLC_CNTL, rlc);
5223 }
5224
5225 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5226 {
5227         u32 data, orig;
5228
5229         orig = data = RREG32(DMA_PG);
5230         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5231                 data |= PG_CNTL_ENABLE;
5232         else
5233                 data &= ~PG_CNTL_ENABLE;
5234         if (orig != data)
5235                 WREG32(DMA_PG, data);
5236 }
5237
5238 static void si_init_dma_pg(struct radeon_device *rdev)
5239 {
5240         u32 tmp;
5241
5242         WREG32(DMA_PGFSM_WRITE,  0x00002000);
5243         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5244
5245         for (tmp = 0; tmp < 5; tmp++)
5246                 WREG32(DMA_PGFSM_WRITE, 0);
5247 }
5248
5249 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5250                                bool enable)
5251 {
5252         u32 tmp;
5253
5254         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5255                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5256                 WREG32(RLC_TTOP_D, tmp);
5257
5258                 tmp = RREG32(RLC_PG_CNTL);
5259                 tmp |= GFX_PG_ENABLE;
5260                 WREG32(RLC_PG_CNTL, tmp);
5261
5262                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5263                 tmp |= AUTO_PG_EN;
5264                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5265         } else {
5266                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5267                 tmp &= ~AUTO_PG_EN;
5268                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5269
5270                 tmp = RREG32(DB_RENDER_CONTROL);
5271         }
5272 }
5273
5274 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5275 {
5276         u32 tmp;
5277
5278         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5279
5280         tmp = RREG32(RLC_PG_CNTL);
5281         tmp |= GFX_PG_SRC;
5282         WREG32(RLC_PG_CNTL, tmp);
5283
5284         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5285
5286         tmp = RREG32(RLC_AUTO_PG_CTRL);
5287
5288         tmp &= ~GRBM_REG_SGIT_MASK;
5289         tmp |= GRBM_REG_SGIT(0x700);
5290         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5291         WREG32(RLC_AUTO_PG_CTRL, tmp);
5292 }
5293
5294 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5295 {
5296         u32 mask = 0, tmp, tmp1;
5297         int i;
5298
5299         si_select_se_sh(rdev, se, sh);
5300         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5301         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5302         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5303
5304         tmp &= 0xffff0000;
5305
5306         tmp |= tmp1;
5307         tmp >>= 16;
5308
5309         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5310                 mask <<= 1;
5311                 mask |= 1;
5312         }
5313
5314         return (~tmp) & mask;
5315 }
5316
5317 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5318 {
5319         u32 i, j, k, active_cu_number = 0;
5320         u32 mask, counter, cu_bitmap;
5321         u32 tmp = 0;
5322
5323         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5324                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5325                         mask = 1;
5326                         cu_bitmap = 0;
5327                         counter  = 0;
5328                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5329                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5330                                         if (counter < 2)
5331                                                 cu_bitmap |= mask;
5332                                         counter++;
5333                                 }
5334                                 mask <<= 1;
5335                         }
5336
5337                         active_cu_number += counter;
5338                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5339                 }
5340         }
5341
5342         WREG32(RLC_PG_AO_CU_MASK, tmp);
5343
5344         tmp = RREG32(RLC_MAX_PG_CU);
5345         tmp &= ~MAX_PU_CU_MASK;
5346         tmp |= MAX_PU_CU(active_cu_number);
5347         WREG32(RLC_MAX_PG_CU, tmp);
5348 }
5349
5350 static void si_enable_cgcg(struct radeon_device *rdev,
5351                            bool enable)
5352 {
5353         u32 data, orig, tmp;
5354
5355         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5356
5357         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5358                 si_enable_gui_idle_interrupt(rdev, true);
5359
5360                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5361
5362                 tmp = si_halt_rlc(rdev);
5363
5364                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5365                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5366                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5367
5368                 si_wait_for_rlc_serdes(rdev);
5369
5370                 si_update_rlc(rdev, tmp);
5371
5372                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5373
5374                 data |= CGCG_EN | CGLS_EN;
5375         } else {
5376                 si_enable_gui_idle_interrupt(rdev, false);
5377
5378                 RREG32(CB_CGTT_SCLK_CTRL);
5379                 RREG32(CB_CGTT_SCLK_CTRL);
5380                 RREG32(CB_CGTT_SCLK_CTRL);
5381                 RREG32(CB_CGTT_SCLK_CTRL);
5382
5383                 data &= ~(CGCG_EN | CGLS_EN);
5384         }
5385
5386         if (orig != data)
5387                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5388 }
5389
5390 static void si_enable_mgcg(struct radeon_device *rdev,
5391                            bool enable)
5392 {
5393         u32 data, orig, tmp = 0;
5394
5395         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5396                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5397                 data = 0x96940200;
5398                 if (orig != data)
5399                         WREG32(CGTS_SM_CTRL_REG, data);
5400
5401                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5402                         orig = data = RREG32(CP_MEM_SLP_CNTL);
5403                         data |= CP_MEM_LS_EN;
5404                         if (orig != data)
5405                                 WREG32(CP_MEM_SLP_CNTL, data);
5406                 }
5407
5408                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5409                 data &= 0xffffffc0;
5410                 if (orig != data)
5411                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5412
5413                 tmp = si_halt_rlc(rdev);
5414
5415                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5416                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5417                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5418
5419                 si_update_rlc(rdev, tmp);
5420         } else {
5421                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5422                 data |= 0x00000003;
5423                 if (orig != data)
5424                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5425
5426                 data = RREG32(CP_MEM_SLP_CNTL);
5427                 if (data & CP_MEM_LS_EN) {
5428                         data &= ~CP_MEM_LS_EN;
5429                         WREG32(CP_MEM_SLP_CNTL, data);
5430                 }
5431                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5432                 data |= LS_OVERRIDE | OVERRIDE;
5433                 if (orig != data)
5434                         WREG32(CGTS_SM_CTRL_REG, data);
5435
5436                 tmp = si_halt_rlc(rdev);
5437
5438                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5439                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5440                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5441
5442                 si_update_rlc(rdev, tmp);
5443         }
5444 }
5445
5446 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5447                                bool enable)
5448 {
5449         u32 orig, data, tmp;
5450
5451         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5452                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5453                 tmp |= 0x3fff;
5454                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5455
5456                 orig = data = RREG32(UVD_CGC_CTRL);
5457                 data |= DCM;
5458                 if (orig != data)
5459                         WREG32(UVD_CGC_CTRL, data);
5460
5461                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5462                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5463         } else {
5464                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5465                 tmp &= ~0x3fff;
5466                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5467
5468                 orig = data = RREG32(UVD_CGC_CTRL);
5469                 data &= ~DCM;
5470                 if (orig != data)
5471                         WREG32(UVD_CGC_CTRL, data);
5472
5473                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5474                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5475         }
5476 }
5477
5478 static const u32 mc_cg_registers[] =
5479 {
5480         MC_HUB_MISC_HUB_CG,
5481         MC_HUB_MISC_SIP_CG,
5482         MC_HUB_MISC_VM_CG,
5483         MC_XPB_CLK_GAT,
5484         ATC_MISC_CG,
5485         MC_CITF_MISC_WR_CG,
5486         MC_CITF_MISC_RD_CG,
5487         MC_CITF_MISC_VM_CG,
5488         VM_L2_CG,
5489 };
5490
5491 static void si_enable_mc_ls(struct radeon_device *rdev,
5492                             bool enable)
5493 {
5494         int i;
5495         u32 orig, data;
5496
5497         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5498                 orig = data = RREG32(mc_cg_registers[i]);
5499                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5500                         data |= MC_LS_ENABLE;
5501                 else
5502                         data &= ~MC_LS_ENABLE;
5503                 if (data != orig)
5504                         WREG32(mc_cg_registers[i], data);
5505         }
5506 }
5507
5508 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5509                                bool enable)
5510 {
5511         int i;
5512         u32 orig, data;
5513
5514         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5515                 orig = data = RREG32(mc_cg_registers[i]);
5516                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5517                         data |= MC_CG_ENABLE;
5518                 else
5519                         data &= ~MC_CG_ENABLE;
5520                 if (data != orig)
5521                         WREG32(mc_cg_registers[i], data);
5522         }
5523 }
5524
5525 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5526                                bool enable)
5527 {
5528         u32 orig, data, offset;
5529         int i;
5530
5531         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5532                 for (i = 0; i < 2; i++) {
5533                         if (i == 0)
5534                                 offset = DMA0_REGISTER_OFFSET;
5535                         else
5536                                 offset = DMA1_REGISTER_OFFSET;
5537                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5538                         data &= ~MEM_POWER_OVERRIDE;
5539                         if (data != orig)
5540                                 WREG32(DMA_POWER_CNTL + offset, data);
5541                         WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5542                 }
5543         } else {
5544                 for (i = 0; i < 2; i++) {
5545                         if (i == 0)
5546                                 offset = DMA0_REGISTER_OFFSET;
5547                         else
5548                                 offset = DMA1_REGISTER_OFFSET;
5549                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5550                         data |= MEM_POWER_OVERRIDE;
5551                         if (data != orig)
5552                                 WREG32(DMA_POWER_CNTL + offset, data);
5553
5554                         orig = data = RREG32(DMA_CLK_CTRL + offset);
5555                         data = 0xff000000;
5556                         if (data != orig)
5557                                 WREG32(DMA_CLK_CTRL + offset, data);
5558                 }
5559         }
5560 }
5561
5562 static void si_enable_bif_mgls(struct radeon_device *rdev,
5563                                bool enable)
5564 {
5565         u32 orig, data;
5566
5567         orig = data = RREG32_PCIE(PCIE_CNTL2);
5568
5569         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5570                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5571                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5572         else
5573                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5574                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5575
5576         if (orig != data)
5577                 WREG32_PCIE(PCIE_CNTL2, data);
5578 }
5579
5580 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5581                                bool enable)
5582 {
5583         u32 orig, data;
5584
5585         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5586
5587         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5588                 data &= ~CLOCK_GATING_DIS;
5589         else
5590                 data |= CLOCK_GATING_DIS;
5591
5592         if (orig != data)
5593                 WREG32(HDP_HOST_PATH_CNTL, data);
5594 }
5595
5596 static void si_enable_hdp_ls(struct radeon_device *rdev,
5597                              bool enable)
5598 {
5599         u32 orig, data;
5600
5601         orig = data = RREG32(HDP_MEM_POWER_LS);
5602
5603         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5604                 data |= HDP_LS_ENABLE;
5605         else
5606                 data &= ~HDP_LS_ENABLE;
5607
5608         if (orig != data)
5609                 WREG32(HDP_MEM_POWER_LS, data);
5610 }
5611
5612 static void si_update_cg(struct radeon_device *rdev,
5613                          u32 block, bool enable)
5614 {
5615         if (block & RADEON_CG_BLOCK_GFX) {
5616                 si_enable_gui_idle_interrupt(rdev, false);
5617                 /* order matters! */
5618                 if (enable) {
5619                         si_enable_mgcg(rdev, true);
5620                         si_enable_cgcg(rdev, true);
5621                 } else {
5622                         si_enable_cgcg(rdev, false);
5623                         si_enable_mgcg(rdev, false);
5624                 }
5625                 si_enable_gui_idle_interrupt(rdev, true);
5626         }
5627
5628         if (block & RADEON_CG_BLOCK_MC) {
5629                 si_enable_mc_mgcg(rdev, enable);
5630                 si_enable_mc_ls(rdev, enable);
5631         }
5632
5633         if (block & RADEON_CG_BLOCK_SDMA) {
5634                 si_enable_dma_mgcg(rdev, enable);
5635         }
5636
5637         if (block & RADEON_CG_BLOCK_BIF) {
5638                 si_enable_bif_mgls(rdev, enable);
5639         }
5640
5641         if (block & RADEON_CG_BLOCK_UVD) {
5642                 if (rdev->has_uvd) {
5643                         si_enable_uvd_mgcg(rdev, enable);
5644                 }
5645         }
5646
5647         if (block & RADEON_CG_BLOCK_HDP) {
5648                 si_enable_hdp_mgcg(rdev, enable);
5649                 si_enable_hdp_ls(rdev, enable);
5650         }
5651 }
5652
5653 static void si_init_cg(struct radeon_device *rdev)
5654 {
5655         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5656                             RADEON_CG_BLOCK_MC |
5657                             RADEON_CG_BLOCK_SDMA |
5658                             RADEON_CG_BLOCK_BIF |
5659                             RADEON_CG_BLOCK_HDP), true);
5660         if (rdev->has_uvd) {
5661                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5662                 si_init_uvd_internal_cg(rdev);
5663         }
5664 }
5665
5666 static void si_fini_cg(struct radeon_device *rdev)
5667 {
5668         if (rdev->has_uvd) {
5669                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5670         }
5671         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5672                             RADEON_CG_BLOCK_MC |
5673                             RADEON_CG_BLOCK_SDMA |
5674                             RADEON_CG_BLOCK_BIF |
5675                             RADEON_CG_BLOCK_HDP), false);
5676 }
5677
5678 u32 si_get_csb_size(struct radeon_device *rdev)
5679 {
5680         u32 count = 0;
5681         const struct cs_section_def *sect = NULL;
5682         const struct cs_extent_def *ext = NULL;
5683
5684         if (rdev->rlc.cs_data == NULL)
5685                 return 0;
5686
5687         /* begin clear state */
5688         count += 2;
5689         /* context control state */
5690         count += 3;
5691
5692         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5693                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5694                         if (sect->id == SECT_CONTEXT)
5695                                 count += 2 + ext->reg_count;
5696                         else
5697                                 return 0;
5698                 }
5699         }
5700         /* pa_sc_raster_config */
5701         count += 3;
5702         /* end clear state */
5703         count += 2;
5704         /* clear state */
5705         count += 2;
5706
5707         return count;
5708 }
5709
5710 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5711 {
5712         u32 count = 0, i;
5713         const struct cs_section_def *sect = NULL;
5714         const struct cs_extent_def *ext = NULL;
5715
5716         if (rdev->rlc.cs_data == NULL)
5717                 return;
5718         if (buffer == NULL)
5719                 return;
5720
5721         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5722         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5723
5724         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5725         buffer[count++] = cpu_to_le32(0x80000000);
5726         buffer[count++] = cpu_to_le32(0x80000000);
5727
5728         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5729                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5730                         if (sect->id == SECT_CONTEXT) {
5731                                 buffer[count++] =
5732                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5733                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5734                                 for (i = 0; i < ext->reg_count; i++)
5735                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
5736                         } else {
5737                                 return;
5738                         }
5739                 }
5740         }
5741
5742         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5743         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5744         switch (rdev->family) {
5745         case CHIP_TAHITI:
5746         case CHIP_PITCAIRN:
5747                 buffer[count++] = cpu_to_le32(0x2a00126a);
5748                 break;
5749         case CHIP_VERDE:
5750                 buffer[count++] = cpu_to_le32(0x0000124a);
5751                 break;
5752         case CHIP_OLAND:
5753                 buffer[count++] = cpu_to_le32(0x00000082);
5754                 break;
5755         case CHIP_HAINAN:
5756                 buffer[count++] = cpu_to_le32(0x00000000);
5757                 break;
5758         default:
5759                 buffer[count++] = cpu_to_le32(0x00000000);
5760                 break;
5761         }
5762
5763         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5764         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5765
5766         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5767         buffer[count++] = cpu_to_le32(0);
5768 }
5769
5770 static void si_init_pg(struct radeon_device *rdev)
5771 {
5772         if (rdev->pg_flags) {
5773                 if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5774                         si_init_dma_pg(rdev);
5775                 }
5776                 si_init_ao_cu_mask(rdev);
5777                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5778                         si_init_gfx_cgpg(rdev);
5779                 } else {
5780                         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5781                         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5782                 }
5783                 si_enable_dma_pg(rdev, true);
5784                 si_enable_gfx_cgpg(rdev, true);
5785         } else {
5786                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5787                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5788         }
5789 }
5790
5791 static void si_fini_pg(struct radeon_device *rdev)
5792 {
5793         if (rdev->pg_flags) {
5794                 si_enable_dma_pg(rdev, false);
5795                 si_enable_gfx_cgpg(rdev, false);
5796         }
5797 }
5798
5799 /*
5800  * RLC
5801  */
5802 void si_rlc_reset(struct radeon_device *rdev)
5803 {
5804         u32 tmp = RREG32(GRBM_SOFT_RESET);
5805
5806         tmp |= SOFT_RESET_RLC;
5807         WREG32(GRBM_SOFT_RESET, tmp);
5808         udelay(50);
5809         tmp &= ~SOFT_RESET_RLC;
5810         WREG32(GRBM_SOFT_RESET, tmp);
5811         udelay(50);
5812 }
5813
5814 static void si_rlc_stop(struct radeon_device *rdev)
5815 {
5816         WREG32(RLC_CNTL, 0);
5817
5818         si_enable_gui_idle_interrupt(rdev, false);
5819
5820         si_wait_for_rlc_serdes(rdev);
5821 }
5822
5823 static void si_rlc_start(struct radeon_device *rdev)
5824 {
5825         WREG32(RLC_CNTL, RLC_ENABLE);
5826
5827         si_enable_gui_idle_interrupt(rdev, true);
5828
5829         udelay(50);
5830 }
5831
5832 static bool si_lbpw_supported(struct radeon_device *rdev)
5833 {
5834         u32 tmp;
5835
5836         /* Enable LBPW only for DDR3 */
5837         tmp = RREG32(MC_SEQ_MISC0);
5838         if ((tmp & 0xF0000000) == 0xB0000000)
5839                 return true;
5840         return false;
5841 }
5842
5843 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5844 {
5845         u32 tmp;
5846
5847         tmp = RREG32(RLC_LB_CNTL);
5848         if (enable)
5849                 tmp |= LOAD_BALANCE_ENABLE;
5850         else
5851                 tmp &= ~LOAD_BALANCE_ENABLE;
5852         WREG32(RLC_LB_CNTL, tmp);
5853
5854         if (!enable) {
5855                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5856                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5857         }
5858 }
5859
5860 static int si_rlc_resume(struct radeon_device *rdev)
5861 {
5862         u32 i;
5863
5864         if (!rdev->rlc_fw)
5865                 return -EINVAL;
5866
5867         si_rlc_stop(rdev);
5868
5869         si_rlc_reset(rdev);
5870
5871         si_init_pg(rdev);
5872
5873         si_init_cg(rdev);
5874
5875         WREG32(RLC_RL_BASE, 0);
5876         WREG32(RLC_RL_SIZE, 0);
5877         WREG32(RLC_LB_CNTL, 0);
5878         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5879         WREG32(RLC_LB_CNTR_INIT, 0);
5880         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5881
5882         WREG32(RLC_MC_CNTL, 0);
5883         WREG32(RLC_UCODE_CNTL, 0);
5884
5885         if (rdev->new_fw) {
5886                 const struct rlc_firmware_header_v1_0 *hdr =
5887                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5888                 u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5889                 const __le32 *fw_data = (const __le32 *)
5890                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5891
5892                 radeon_ucode_print_rlc_hdr(&hdr->header);
5893
5894                 for (i = 0; i < fw_size; i++) {
5895                         WREG32(RLC_UCODE_ADDR, i);
5896                         WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5897                 }
5898         } else {
5899                 const __be32 *fw_data =
5900                         (const __be32 *)rdev->rlc_fw->data;
5901                 for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5902                         WREG32(RLC_UCODE_ADDR, i);
5903                         WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5904                 }
5905         }
5906         WREG32(RLC_UCODE_ADDR, 0);
5907
5908         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5909
5910         si_rlc_start(rdev);
5911
5912         return 0;
5913 }
5914
5915 static void si_enable_interrupts(struct radeon_device *rdev)
5916 {
5917         u32 ih_cntl = RREG32(IH_CNTL);
5918         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5919
5920         ih_cntl |= ENABLE_INTR;
5921         ih_rb_cntl |= IH_RB_ENABLE;
5922         WREG32(IH_CNTL, ih_cntl);
5923         WREG32(IH_RB_CNTL, ih_rb_cntl);
5924         rdev->ih.enabled = true;
5925 }
5926
5927 static void si_disable_interrupts(struct radeon_device *rdev)
5928 {
5929         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5930         u32 ih_cntl = RREG32(IH_CNTL);
5931
5932         ih_rb_cntl &= ~IH_RB_ENABLE;
5933         ih_cntl &= ~ENABLE_INTR;
5934         WREG32(IH_RB_CNTL, ih_rb_cntl);
5935         WREG32(IH_CNTL, ih_cntl);
5936         /* set rptr, wptr to 0 */
5937         WREG32(IH_RB_RPTR, 0);
5938         WREG32(IH_RB_WPTR, 0);
5939         rdev->ih.enabled = false;
5940         rdev->ih.rptr = 0;
5941 }
5942
5943 static void si_disable_interrupt_state(struct radeon_device *rdev)
5944 {
5945         int i;
5946         u32 tmp;
5947
5948         tmp = RREG32(CP_INT_CNTL_RING0) &
5949                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5950         WREG32(CP_INT_CNTL_RING0, tmp);
5951         WREG32(CP_INT_CNTL_RING1, 0);
5952         WREG32(CP_INT_CNTL_RING2, 0);
5953         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5954         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5955         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5956         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5957         WREG32(GRBM_INT_CNTL, 0);
5958         WREG32(SRBM_INT_CNTL, 0);
5959         for (i = 0; i < rdev->num_crtc; i++)
5960                 WREG32(INT_MASK + crtc_offsets[i], 0);
5961         for (i = 0; i < rdev->num_crtc; i++)
5962                 WREG32(GRPH_INT_CONTROL + crtc_offsets[i], 0);
5963
5964         if (!ASIC_IS_NODCE(rdev)) {
5965                 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5966
5967                 for (i = 0; i < 6; i++)
5968                         WREG32_AND(DC_HPDx_INT_CONTROL(i),
5969                                    DC_HPDx_INT_POLARITY);
5970         }
5971 }
5972
5973 static int si_irq_init(struct radeon_device *rdev)
5974 {
5975         int ret = 0;
5976         int rb_bufsz;
5977         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5978
5979         /* allocate ring */
5980         ret = r600_ih_ring_alloc(rdev);
5981         if (ret)
5982                 return ret;
5983
5984         /* disable irqs */
5985         si_disable_interrupts(rdev);
5986
5987         /* init rlc */
5988         ret = si_rlc_resume(rdev);
5989         if (ret) {
5990                 r600_ih_ring_fini(rdev);
5991                 return ret;
5992         }
5993
5994         /* setup interrupt control */
5995         /* set dummy read address to dummy page address */
5996         WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
5997         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5998         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5999          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6000          */
6001         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6002         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6003         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6004         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6005
6006         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6007         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6008
6009         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6010                       IH_WPTR_OVERFLOW_CLEAR |
6011                       (rb_bufsz << 1));
6012
6013         if (rdev->wb.enabled)
6014                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6015
6016         /* set the writeback address whether it's enabled or not */
6017         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6018         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6019
6020         WREG32(IH_RB_CNTL, ih_rb_cntl);
6021
6022         /* set rptr, wptr to 0 */
6023         WREG32(IH_RB_RPTR, 0);
6024         WREG32(IH_RB_WPTR, 0);
6025
6026         /* Default settings for IH_CNTL (disabled at first) */
6027         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6028         /* RPTR_REARM only works if msi's are enabled */
6029         if (rdev->msi_enabled)
6030                 ih_cntl |= RPTR_REARM;
6031         WREG32(IH_CNTL, ih_cntl);
6032
6033         /* force the active interrupt state to all disabled */
6034         si_disable_interrupt_state(rdev);
6035
6036         pci_set_master(rdev->pdev);
6037
6038         /* enable irqs */
6039         si_enable_interrupts(rdev);
6040
6041         return ret;
6042 }
6043
6044 /* The order we write back each register here is important */
6045 int si_irq_set(struct radeon_device *rdev)
6046 {
6047         int i;
6048         u32 cp_int_cntl;
6049         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6050         u32 grbm_int_cntl = 0;
6051         u32 dma_cntl, dma_cntl1;
6052         u32 thermal_int = 0;
6053
6054         if (!rdev->irq.installed) {
6055                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6056                 return -EINVAL;
6057         }
6058         /* don't enable anything if the ih is disabled */
6059         if (!rdev->ih.enabled) {
6060                 si_disable_interrupts(rdev);
6061                 /* force the active interrupt state to all disabled */
6062                 si_disable_interrupt_state(rdev);
6063                 return 0;
6064         }
6065
6066         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6067                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6068
6069         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6070         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6071
6072         thermal_int = RREG32(CG_THERMAL_INT) &
6073                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6074
6075         /* enable CP interrupts on all rings */
6076         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6077                 DRM_DEBUG("si_irq_set: sw int gfx\n");
6078                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6079         }
6080         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6081                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6082                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6083         }
6084         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6085                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6086                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6087         }
6088         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6089                 DRM_DEBUG("si_irq_set: sw int dma\n");
6090                 dma_cntl |= TRAP_ENABLE;
6091         }
6092
6093         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6094                 DRM_DEBUG("si_irq_set: sw int dma1\n");
6095                 dma_cntl1 |= TRAP_ENABLE;
6096         }
6097
6098         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6099         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6100         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6101
6102         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6103         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6104
6105         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6106
6107         if (rdev->irq.dpm_thermal) {
6108                 DRM_DEBUG("dpm thermal\n");
6109                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6110         }
6111
6112         for (i = 0; i < rdev->num_crtc; i++) {
6113                 radeon_irq_kms_set_irq_n_enabled(
6114                     rdev, INT_MASK + crtc_offsets[i], VBLANK_INT_MASK,
6115                     rdev->irq.crtc_vblank_int[i] ||
6116                     atomic_read(&rdev->irq.pflip[i]), "vblank", i);
6117         }
6118
6119         for (i = 0; i < rdev->num_crtc; i++)
6120                 WREG32(GRPH_INT_CONTROL + crtc_offsets[i], GRPH_PFLIP_INT_MASK);
6121
6122         if (!ASIC_IS_NODCE(rdev)) {
6123                 for (i = 0; i < 6; i++) {
6124                         radeon_irq_kms_set_irq_n_enabled(
6125                             rdev, DC_HPDx_INT_CONTROL(i),
6126                             DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN,
6127                             rdev->irq.hpd[i], "HPD", i);
6128                 }
6129         }
6130
6131         WREG32(CG_THERMAL_INT, thermal_int);
6132
6133         /* posting read */
6134         RREG32(SRBM_STATUS);
6135
6136         return 0;
6137 }
6138
6139 /* The order we write back each register here is important */
6140 static inline void si_irq_ack(struct radeon_device *rdev)
6141 {
6142         int i, j;
6143         u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6144         u32 *grph_int = rdev->irq.stat_regs.evergreen.grph_int;
6145
6146         if (ASIC_IS_NODCE(rdev))
6147                 return;
6148
6149         for (i = 0; i < 6; i++) {
6150                 disp_int[i] = RREG32(si_disp_int_status[i]);
6151                 if (i < rdev->num_crtc)
6152                         grph_int[i] = RREG32(GRPH_INT_STATUS + crtc_offsets[i]);
6153         }
6154
6155         /* We write back each interrupt register in pairs of two */
6156         for (i = 0; i < rdev->num_crtc; i += 2) {
6157                 for (j = i; j < (i + 2); j++) {
6158                         if (grph_int[j] & GRPH_PFLIP_INT_OCCURRED)
6159                                 WREG32(GRPH_INT_STATUS + crtc_offsets[j],
6160                                        GRPH_PFLIP_INT_CLEAR);
6161                 }
6162
6163                 for (j = i; j < (i + 2); j++) {
6164                         if (disp_int[j] & LB_D1_VBLANK_INTERRUPT)
6165                                 WREG32(VBLANK_STATUS + crtc_offsets[j],
6166                                        VBLANK_ACK);
6167                         if (disp_int[j] & LB_D1_VLINE_INTERRUPT)
6168                                 WREG32(VLINE_STATUS + crtc_offsets[j],
6169                                        VLINE_ACK);
6170                 }
6171         }
6172
6173         for (i = 0; i < 6; i++) {
6174                 if (disp_int[i] & DC_HPD1_INTERRUPT)
6175                         WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_INT_ACK);
6176         }
6177
6178         for (i = 0; i < 6; i++) {
6179                 if (disp_int[i] & DC_HPD1_RX_INTERRUPT)
6180                         WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_RX_INT_ACK);
6181         }
6182 }
6183
6184 static void si_irq_disable(struct radeon_device *rdev)
6185 {
6186         si_disable_interrupts(rdev);
6187         /* Wait and acknowledge irq */
6188         mdelay(1);
6189         si_irq_ack(rdev);
6190         si_disable_interrupt_state(rdev);
6191 }
6192
6193 static void si_irq_suspend(struct radeon_device *rdev)
6194 {
6195         si_irq_disable(rdev);
6196         si_rlc_stop(rdev);
6197 }
6198
6199 static void si_irq_fini(struct radeon_device *rdev)
6200 {
6201         si_irq_suspend(rdev);
6202         r600_ih_ring_fini(rdev);
6203 }
6204
6205 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6206 {
6207         u32 wptr, tmp;
6208
6209         if (rdev->wb.enabled)
6210                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6211         else
6212                 wptr = RREG32(IH_RB_WPTR);
6213
6214         if (wptr & RB_OVERFLOW) {
6215                 wptr &= ~RB_OVERFLOW;
6216                 /* When a ring buffer overflow happen start parsing interrupt
6217                  * from the last not overwritten vector (wptr + 16). Hopefully
6218                  * this should allow us to catchup.
6219                  */
6220                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6221                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6222                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6223                 tmp = RREG32(IH_RB_CNTL);
6224                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6225                 WREG32(IH_RB_CNTL, tmp);
6226         }
6227         return (wptr & rdev->ih.ptr_mask);
6228 }
6229
6230 /*        SI IV Ring
6231  * Each IV ring entry is 128 bits:
6232  * [7:0]    - interrupt source id
6233  * [31:8]   - reserved
6234  * [59:32]  - interrupt source data
6235  * [63:60]  - reserved
6236  * [71:64]  - RINGID
6237  * [79:72]  - VMID
6238  * [127:80] - reserved
6239  */
6240 int si_irq_process(struct radeon_device *rdev)
6241 {
6242         u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6243         u32 crtc_idx, hpd_idx;
6244         u32 mask;
6245         u32 wptr;
6246         u32 rptr;
6247         u32 src_id, src_data, ring_id;
6248         u32 ring_index;
6249         bool queue_hotplug = false;
6250         bool queue_dp = false;
6251         bool queue_thermal = false;
6252         u32 status, addr;
6253         const char *event_name;
6254
6255         if (!rdev->ih.enabled || rdev->shutdown)
6256                 return IRQ_NONE;
6257
6258         wptr = si_get_ih_wptr(rdev);
6259
6260 restart_ih:
6261         /* is somebody else already processing irqs? */
6262         if (atomic_xchg(&rdev->ih.lock, 1))
6263                 return IRQ_NONE;
6264
6265         rptr = rdev->ih.rptr;
6266         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6267
6268         /* Order reading of wptr vs. reading of IH ring data */
6269         rmb();
6270
6271         /* display interrupts */
6272         si_irq_ack(rdev);
6273
6274         while (rptr != wptr) {
6275                 /* wptr/rptr are in bytes! */
6276                 ring_index = rptr / 4;
6277                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6278                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6279                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6280
6281                 switch (src_id) {
6282                 case 1: /* D1 vblank/vline */
6283                 case 2: /* D2 vblank/vline */
6284                 case 3: /* D3 vblank/vline */
6285                 case 4: /* D4 vblank/vline */
6286                 case 5: /* D5 vblank/vline */
6287                 case 6: /* D6 vblank/vline */
6288                         crtc_idx = src_id - 1;
6289
6290                         if (src_data == 0) { /* vblank */
6291                                 mask = LB_D1_VBLANK_INTERRUPT;
6292                                 event_name = "vblank";
6293
6294                                 if (rdev->irq.crtc_vblank_int[crtc_idx]) {
6295                                         drm_handle_vblank(rdev->ddev, crtc_idx);
6296                                         rdev->pm.vblank_sync = true;
6297                                         wake_up(&rdev->irq.vblank_queue);
6298                                 }
6299                                 if (atomic_read(&rdev->irq.pflip[crtc_idx])) {
6300                                         radeon_crtc_handle_vblank(rdev,
6301                                                                   crtc_idx);
6302                                 }
6303
6304                         } else if (src_data == 1) { /* vline */
6305                                 mask = LB_D1_VLINE_INTERRUPT;
6306                                 event_name = "vline";
6307                         } else {
6308                                 DRM_DEBUG("Unhandled interrupt: %d %d\n",
6309                                           src_id, src_data);
6310                                 break;
6311                         }
6312
6313                         if (!(disp_int[crtc_idx] & mask)) {
6314                                 DRM_DEBUG("IH: D%d %s - IH event w/o asserted irq bit?\n",
6315                                           crtc_idx + 1, event_name);
6316                         }
6317
6318                         disp_int[crtc_idx] &= ~mask;
6319                         DRM_DEBUG("IH: D%d %s\n", crtc_idx + 1, event_name);
6320
6321                         break;
6322                 case 8: /* D1 page flip */
6323                 case 10: /* D2 page flip */
6324                 case 12: /* D3 page flip */
6325                 case 14: /* D4 page flip */
6326                 case 16: /* D5 page flip */
6327                 case 18: /* D6 page flip */
6328                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6329                         if (radeon_use_pflipirq > 0)
6330                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6331                         break;
6332                 case 42: /* HPD hotplug */
6333                         if (src_data <= 5) {
6334                                 hpd_idx = src_data;
6335                                 mask = DC_HPD1_INTERRUPT;
6336                                 queue_hotplug = true;
6337                                 event_name = "HPD";
6338
6339                         } else if (src_data <= 11) {
6340                                 hpd_idx = src_data - 6;
6341                                 mask = DC_HPD1_RX_INTERRUPT;
6342                                 queue_dp = true;
6343                                 event_name = "HPD_RX";
6344
6345                         } else {
6346                                 DRM_DEBUG("Unhandled interrupt: %d %d\n",
6347                                           src_id, src_data);
6348                                 break;
6349                         }
6350
6351                         if (!(disp_int[hpd_idx] & mask))
6352                                 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6353
6354                         disp_int[hpd_idx] &= ~mask;
6355                         DRM_DEBUG("IH: %s%d\n", event_name, hpd_idx + 1);
6356                         break;
6357                 case 96:
6358                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6359                         WREG32(SRBM_INT_ACK, 0x1);
6360                         break;
6361                 case 124: /* UVD */
6362                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6363                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6364                         break;
6365                 case 146:
6366                 case 147:
6367                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6368                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6369                         /* reset addr and status */
6370                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6371                         if (addr == 0x0 && status == 0x0)
6372                                 break;
6373                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6374                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6375                                 addr);
6376                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6377                                 status);
6378                         si_vm_decode_fault(rdev, status, addr);
6379                         break;
6380                 case 176: /* RINGID0 CP_INT */
6381                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6382                         break;
6383                 case 177: /* RINGID1 CP_INT */
6384                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6385                         break;
6386                 case 178: /* RINGID2 CP_INT */
6387                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6388                         break;
6389                 case 181: /* CP EOP event */
6390                         DRM_DEBUG("IH: CP EOP\n");
6391                         switch (ring_id) {
6392                         case 0:
6393                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6394                                 break;
6395                         case 1:
6396                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6397                                 break;
6398                         case 2:
6399                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6400                                 break;
6401                         }
6402                         break;
6403                 case 224: /* DMA trap event */
6404                         DRM_DEBUG("IH: DMA trap\n");
6405                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6406                         break;
6407                 case 230: /* thermal low to high */
6408                         DRM_DEBUG("IH: thermal low to high\n");
6409                         rdev->pm.dpm.thermal.high_to_low = false;
6410                         queue_thermal = true;
6411                         break;
6412                 case 231: /* thermal high to low */
6413                         DRM_DEBUG("IH: thermal high to low\n");
6414                         rdev->pm.dpm.thermal.high_to_low = true;
6415                         queue_thermal = true;
6416                         break;
6417                 case 233: /* GUI IDLE */
6418                         DRM_DEBUG("IH: GUI idle\n");
6419                         break;
6420                 case 244: /* DMA trap event */
6421                         DRM_DEBUG("IH: DMA1 trap\n");
6422                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6423                         break;
6424                 default:
6425                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6426                         break;
6427                 }
6428
6429                 /* wptr/rptr are in bytes! */
6430                 rptr += 16;
6431                 rptr &= rdev->ih.ptr_mask;
6432                 WREG32(IH_RB_RPTR, rptr);
6433         }
6434         if (queue_dp)
6435                 schedule_work(&rdev->dp_work);
6436         if (queue_hotplug)
6437                 schedule_delayed_work(&rdev->hotplug_work, 0);
6438         if (queue_thermal && rdev->pm.dpm_enabled)
6439                 schedule_work(&rdev->pm.dpm.thermal.work);
6440         rdev->ih.rptr = rptr;
6441         atomic_set(&rdev->ih.lock, 0);
6442
6443         /* make sure wptr hasn't changed while processing */
6444         wptr = si_get_ih_wptr(rdev);
6445         if (wptr != rptr)
6446                 goto restart_ih;
6447
6448         return IRQ_HANDLED;
6449 }
6450
6451 /*
6452  * startup/shutdown callbacks
6453  */
6454 static void si_uvd_init(struct radeon_device *rdev)
6455 {
6456         int r;
6457
6458         if (!rdev->has_uvd)
6459                 return;
6460
6461         r = radeon_uvd_init(rdev);
6462         if (r) {
6463                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6464                 /*
6465                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6466                  * to early fails uvd_v2_2_resume() and thus nothing happens
6467                  * there. So it is pointless to try to go through that code
6468                  * hence why we disable uvd here.
6469                  */
6470                 rdev->has_uvd = false;
6471                 return;
6472         }
6473         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6474         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6475 }
6476
6477 static void si_uvd_start(struct radeon_device *rdev)
6478 {
6479         int r;
6480
6481         if (!rdev->has_uvd)
6482                 return;
6483
6484         r = uvd_v2_2_resume(rdev);
6485         if (r) {
6486                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6487                 goto error;
6488         }
6489         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6490         if (r) {
6491                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6492                 goto error;
6493         }
6494         return;
6495
6496 error:
6497         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6498 }
6499
6500 static void si_uvd_resume(struct radeon_device *rdev)
6501 {
6502         struct radeon_ring *ring;
6503         int r;
6504
6505         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6506                 return;
6507
6508         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6509         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6510         if (r) {
6511                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6512                 return;
6513         }
6514         r = uvd_v1_0_init(rdev);
6515         if (r) {
6516                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6517                 return;
6518         }
6519 }
6520
6521 static void si_vce_init(struct radeon_device *rdev)
6522 {
6523         int r;
6524
6525         if (!rdev->has_vce)
6526                 return;
6527
6528         r = radeon_vce_init(rdev);
6529         if (r) {
6530                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6531                 /*
6532                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
6533                  * to early fails si_vce_start() and thus nothing happens
6534                  * there. So it is pointless to try to go through that code
6535                  * hence why we disable vce here.
6536                  */
6537                 rdev->has_vce = false;
6538                 return;
6539         }
6540         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6541         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6542         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6543         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6544 }
6545
6546 static void si_vce_start(struct radeon_device *rdev)
6547 {
6548         int r;
6549
6550         if (!rdev->has_vce)
6551                 return;
6552
6553         r = radeon_vce_resume(rdev);
6554         if (r) {
6555                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6556                 goto error;
6557         }
6558         r = vce_v1_0_resume(rdev);
6559         if (r) {
6560                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6561                 goto error;
6562         }
6563         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6564         if (r) {
6565                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6566                 goto error;
6567         }
6568         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6569         if (r) {
6570                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6571                 goto error;
6572         }
6573         return;
6574
6575 error:
6576         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6577         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6578 }
6579
6580 static void si_vce_resume(struct radeon_device *rdev)
6581 {
6582         struct radeon_ring *ring;
6583         int r;
6584
6585         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
6586                 return;
6587
6588         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
6589         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6590         if (r) {
6591                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6592                 return;
6593         }
6594         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
6595         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6596         if (r) {
6597                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6598                 return;
6599         }
6600         r = vce_v1_0_init(rdev);
6601         if (r) {
6602                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
6603                 return;
6604         }
6605 }
6606
6607 static int si_startup(struct radeon_device *rdev)
6608 {
6609         struct radeon_ring *ring;
6610         int r;
6611
6612         /* enable pcie gen2/3 link */
6613         si_pcie_gen3_enable(rdev);
6614         /* enable aspm */
6615         si_program_aspm(rdev);
6616
6617         /* scratch needs to be initialized before MC */
6618         r = r600_vram_scratch_init(rdev);
6619         if (r)
6620                 return r;
6621
6622         si_mc_program(rdev);
6623
6624         if (!rdev->pm.dpm_enabled) {
6625                 r = si_mc_load_microcode(rdev);
6626                 if (r) {
6627                         DRM_ERROR("Failed to load MC firmware!\n");
6628                         return r;
6629                 }
6630         }
6631
6632         r = si_pcie_gart_enable(rdev);
6633         if (r)
6634                 return r;
6635         si_gpu_init(rdev);
6636
6637         /* allocate rlc buffers */
6638         if (rdev->family == CHIP_VERDE) {
6639                 rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6640                 rdev->rlc.reg_list_size =
6641                         (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6642         }
6643         rdev->rlc.cs_data = si_cs_data;
6644         r = sumo_rlc_init(rdev);
6645         if (r) {
6646                 DRM_ERROR("Failed to init rlc BOs!\n");
6647                 return r;
6648         }
6649
6650         /* allocate wb buffer */
6651         r = radeon_wb_init(rdev);
6652         if (r)
6653                 return r;
6654
6655         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6656         if (r) {
6657                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6658                 return r;
6659         }
6660
6661         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6662         if (r) {
6663                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6664                 return r;
6665         }
6666
6667         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6668         if (r) {
6669                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6670                 return r;
6671         }
6672
6673         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6674         if (r) {
6675                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6676                 return r;
6677         }
6678
6679         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6680         if (r) {
6681                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6682                 return r;
6683         }
6684
6685         si_uvd_start(rdev);
6686         si_vce_start(rdev);
6687
6688         /* Enable IRQ */
6689         if (!rdev->irq.installed) {
6690                 r = radeon_irq_kms_init(rdev);
6691                 if (r)
6692                         return r;
6693         }
6694
6695         r = si_irq_init(rdev);
6696         if (r) {
6697                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6698                 radeon_irq_kms_fini(rdev);
6699                 return r;
6700         }
6701         si_irq_set(rdev);
6702
6703         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6704         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6705                              RADEON_CP_PACKET2);
6706         if (r)
6707                 return r;
6708
6709         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6710         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6711                              RADEON_CP_PACKET2);
6712         if (r)
6713                 return r;
6714
6715         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6716         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6717                              RADEON_CP_PACKET2);
6718         if (r)
6719                 return r;
6720
6721         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6722         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6723                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6724         if (r)
6725                 return r;
6726
6727         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6728         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6729                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6730         if (r)
6731                 return r;
6732
6733         r = si_cp_load_microcode(rdev);
6734         if (r)
6735                 return r;
6736         r = si_cp_resume(rdev);
6737         if (r)
6738                 return r;
6739
6740         r = cayman_dma_resume(rdev);
6741         if (r)
6742                 return r;
6743
6744         si_uvd_resume(rdev);
6745         si_vce_resume(rdev);
6746
6747         r = radeon_ib_pool_init(rdev);
6748         if (r) {
6749                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6750                 return r;
6751         }
6752
6753         r = radeon_vm_manager_init(rdev);
6754         if (r) {
6755                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6756                 return r;
6757         }
6758
6759         r = radeon_audio_init(rdev);
6760         if (r)
6761                 return r;
6762
6763         return 0;
6764 }
6765
6766 int si_resume(struct radeon_device *rdev)
6767 {
6768         int r;
6769
6770         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6771          * posting will perform necessary task to bring back GPU into good
6772          * shape.
6773          */
6774         /* post card */
6775         atom_asic_init(rdev->mode_info.atom_context);
6776
6777         /* init golden registers */
6778         si_init_golden_registers(rdev);
6779
6780         if (rdev->pm.pm_method == PM_METHOD_DPM)
6781                 radeon_pm_resume(rdev);
6782
6783         rdev->accel_working = true;
6784         r = si_startup(rdev);
6785         if (r) {
6786                 DRM_ERROR("si startup failed on resume\n");
6787                 rdev->accel_working = false;
6788                 return r;
6789         }
6790
6791         return r;
6792
6793 }
6794
6795 int si_suspend(struct radeon_device *rdev)
6796 {
6797         radeon_pm_suspend(rdev);
6798         radeon_audio_fini(rdev);
6799         radeon_vm_manager_fini(rdev);
6800         si_cp_enable(rdev, false);
6801         cayman_dma_stop(rdev);
6802         if (rdev->has_uvd) {
6803                 uvd_v1_0_fini(rdev);
6804                 radeon_uvd_suspend(rdev);
6805         }
6806         if (rdev->has_vce)
6807                 radeon_vce_suspend(rdev);
6808         si_fini_pg(rdev);
6809         si_fini_cg(rdev);
6810         si_irq_suspend(rdev);
6811         radeon_wb_disable(rdev);
6812         si_pcie_gart_disable(rdev);
6813         return 0;
6814 }
6815
6816 /* Plan is to move initialization in that function and use
6817  * helper function so that radeon_device_init pretty much
6818  * do nothing more than calling asic specific function. This
6819  * should also allow to remove a bunch of callback function
6820  * like vram_info.
6821  */
6822 int si_init(struct radeon_device *rdev)
6823 {
6824         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6825         int r;
6826
6827         /* Read BIOS */
6828         if (!radeon_get_bios(rdev)) {
6829                 if (ASIC_IS_AVIVO(rdev))
6830                         return -EINVAL;
6831         }
6832         /* Must be an ATOMBIOS */
6833         if (!rdev->is_atom_bios) {
6834                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6835                 return -EINVAL;
6836         }
6837         r = radeon_atombios_init(rdev);
6838         if (r)
6839                 return r;
6840
6841         /* Post card if necessary */
6842         if (!radeon_card_posted(rdev)) {
6843                 if (!rdev->bios) {
6844                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6845                         return -EINVAL;
6846                 }
6847                 DRM_INFO("GPU not posted. posting now...\n");
6848                 atom_asic_init(rdev->mode_info.atom_context);
6849         }
6850         /* init golden registers */
6851         si_init_golden_registers(rdev);
6852         /* Initialize scratch registers */
6853         si_scratch_init(rdev);
6854         /* Initialize surface registers */
6855         radeon_surface_init(rdev);
6856         /* Initialize clocks */
6857         radeon_get_clock_info(rdev->ddev);
6858
6859         /* Fence driver */
6860         radeon_fence_driver_init(rdev);
6861
6862         /* initialize memory controller */
6863         r = si_mc_init(rdev);
6864         if (r)
6865                 return r;
6866         /* Memory manager */
6867         r = radeon_bo_init(rdev);
6868         if (r)
6869                 return r;
6870
6871         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6872             !rdev->rlc_fw || !rdev->mc_fw) {
6873                 r = si_init_microcode(rdev);
6874                 if (r) {
6875                         DRM_ERROR("Failed to load firmware!\n");
6876                         return r;
6877                 }
6878         }
6879
6880         /* Initialize power management */
6881         radeon_pm_init(rdev);
6882
6883         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6884         ring->ring_obj = NULL;
6885         r600_ring_init(rdev, ring, 1024 * 1024);
6886
6887         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6888         ring->ring_obj = NULL;
6889         r600_ring_init(rdev, ring, 1024 * 1024);
6890
6891         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6892         ring->ring_obj = NULL;
6893         r600_ring_init(rdev, ring, 1024 * 1024);
6894
6895         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6896         ring->ring_obj = NULL;
6897         r600_ring_init(rdev, ring, 64 * 1024);
6898
6899         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6900         ring->ring_obj = NULL;
6901         r600_ring_init(rdev, ring, 64 * 1024);
6902
6903         si_uvd_init(rdev);
6904         si_vce_init(rdev);
6905
6906         rdev->ih.ring_obj = NULL;
6907         r600_ih_ring_init(rdev, 64 * 1024);
6908
6909         r = r600_pcie_gart_init(rdev);
6910         if (r)
6911                 return r;
6912
6913         rdev->accel_working = true;
6914         r = si_startup(rdev);
6915         if (r) {
6916                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6917                 si_cp_fini(rdev);
6918                 cayman_dma_fini(rdev);
6919                 si_irq_fini(rdev);
6920                 sumo_rlc_fini(rdev);
6921                 radeon_wb_fini(rdev);
6922                 radeon_ib_pool_fini(rdev);
6923                 radeon_vm_manager_fini(rdev);
6924                 radeon_irq_kms_fini(rdev);
6925                 si_pcie_gart_fini(rdev);
6926                 rdev->accel_working = false;
6927         }
6928
6929         /* Don't start up if the MC ucode is missing.
6930          * The default clocks and voltages before the MC ucode
6931          * is loaded are not suffient for advanced operations.
6932          */
6933         if (!rdev->mc_fw) {
6934                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6935                 return -EINVAL;
6936         }
6937
6938         return 0;
6939 }
6940
6941 void si_fini(struct radeon_device *rdev)
6942 {
6943         radeon_pm_fini(rdev);
6944         si_cp_fini(rdev);
6945         cayman_dma_fini(rdev);
6946         si_fini_pg(rdev);
6947         si_fini_cg(rdev);
6948         si_irq_fini(rdev);
6949         sumo_rlc_fini(rdev);
6950         radeon_wb_fini(rdev);
6951         radeon_vm_manager_fini(rdev);
6952         radeon_ib_pool_fini(rdev);
6953         radeon_irq_kms_fini(rdev);
6954         if (rdev->has_uvd) {
6955                 uvd_v1_0_fini(rdev);
6956                 radeon_uvd_fini(rdev);
6957         }
6958         if (rdev->has_vce)
6959                 radeon_vce_fini(rdev);
6960         si_pcie_gart_fini(rdev);
6961         r600_vram_scratch_fini(rdev);
6962         radeon_gem_fini(rdev);
6963         radeon_fence_driver_fini(rdev);
6964         radeon_bo_fini(rdev);
6965         radeon_atombios_fini(rdev);
6966         kfree(rdev->bios);
6967         rdev->bios = NULL;
6968 }
6969
6970 /**
6971  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6972  *
6973  * @rdev: radeon_device pointer
6974  *
6975  * Fetches a GPU clock counter snapshot (SI).
6976  * Returns the 64 bit clock counter snapshot.
6977  */
6978 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6979 {
6980         uint64_t clock;
6981
6982         mutex_lock(&rdev->gpu_clock_mutex);
6983         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6984         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6985                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6986         mutex_unlock(&rdev->gpu_clock_mutex);
6987         return clock;
6988 }
6989
6990 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6991 {
6992         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6993         int r;
6994
6995         /* bypass vclk and dclk with bclk */
6996         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6997                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6998                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6999
7000         /* put PLL in bypass mode */
7001         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7002
7003         if (!vclk || !dclk) {
7004                 /* keep the Bypass mode */
7005                 return 0;
7006         }
7007
7008         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7009                                           16384, 0x03FFFFFF, 0, 128, 5,
7010                                           &fb_div, &vclk_div, &dclk_div);
7011         if (r)
7012                 return r;
7013
7014         /* set RESET_ANTI_MUX to 0 */
7015         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7016
7017         /* set VCO_MODE to 1 */
7018         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7019
7020         /* disable sleep mode */
7021         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7022
7023         /* deassert UPLL_RESET */
7024         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7025
7026         mdelay(1);
7027
7028         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7029         if (r)
7030                 return r;
7031
7032         /* assert UPLL_RESET again */
7033         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7034
7035         /* disable spread spectrum. */
7036         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7037
7038         /* set feedback divider */
7039         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7040
7041         /* set ref divider to 0 */
7042         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7043
7044         if (fb_div < 307200)
7045                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7046         else
7047                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7048
7049         /* set PDIV_A and PDIV_B */
7050         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7051                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7052                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7053
7054         /* give the PLL some time to settle */
7055         mdelay(15);
7056
7057         /* deassert PLL_RESET */
7058         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7059
7060         mdelay(15);
7061
7062         /* switch from bypass mode to normal mode */
7063         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7064
7065         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7066         if (r)
7067                 return r;
7068
7069         /* switch VCLK and DCLK selection */
7070         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7071                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7072                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7073
7074         mdelay(100);
7075
7076         return 0;
7077 }
7078
7079 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7080 {
7081         struct pci_dev *root = rdev->pdev->bus->self;
7082         enum pci_bus_speed speed_cap;
7083         u32 speed_cntl, current_data_rate;
7084         int i;
7085         u16 tmp16;
7086
7087         if (pci_is_root_bus(rdev->pdev->bus))
7088                 return;
7089
7090         if (radeon_pcie_gen2 == 0)
7091                 return;
7092
7093         if (rdev->flags & RADEON_IS_IGP)
7094                 return;
7095
7096         if (!(rdev->flags & RADEON_IS_PCIE))
7097                 return;
7098
7099         speed_cap = pcie_get_speed_cap(root);
7100         if (speed_cap == PCI_SPEED_UNKNOWN)
7101                 return;
7102
7103         if ((speed_cap != PCIE_SPEED_8_0GT) &&
7104             (speed_cap != PCIE_SPEED_5_0GT))
7105                 return;
7106
7107         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7108         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7109                 LC_CURRENT_DATA_RATE_SHIFT;
7110         if (speed_cap == PCIE_SPEED_8_0GT) {
7111                 if (current_data_rate == 2) {
7112                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7113                         return;
7114                 }
7115                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7116         } else if (speed_cap == PCIE_SPEED_5_0GT) {
7117                 if (current_data_rate == 1) {
7118                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7119                         return;
7120                 }
7121                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7122         }
7123
7124         if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
7125                 return;
7126
7127         if (speed_cap == PCIE_SPEED_8_0GT) {
7128                 /* re-try equalization if gen3 is not already enabled */
7129                 if (current_data_rate != 2) {
7130                         u16 bridge_cfg, gpu_cfg;
7131                         u16 bridge_cfg2, gpu_cfg2;
7132                         u32 max_lw, current_lw, tmp;
7133
7134                         pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7135                                                   &bridge_cfg);
7136                         pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
7137                                                   &gpu_cfg);
7138
7139                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7140                         pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
7141
7142                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7143                         pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
7144                                                    tmp16);
7145
7146                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7147                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7148                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7149
7150                         if (current_lw < max_lw) {
7151                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7152                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
7153                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7154                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7155                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7156                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7157                                 }
7158                         }
7159
7160                         for (i = 0; i < 10; i++) {
7161                                 /* check status */
7162                                 pcie_capability_read_word(rdev->pdev,
7163                                                           PCI_EXP_DEVSTA,
7164                                                           &tmp16);
7165                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7166                                         break;
7167
7168                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7169                                                           &bridge_cfg);
7170                                 pcie_capability_read_word(rdev->pdev,
7171                                                           PCI_EXP_LNKCTL,
7172                                                           &gpu_cfg);
7173
7174                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
7175                                                           &bridge_cfg2);
7176                                 pcie_capability_read_word(rdev->pdev,
7177                                                           PCI_EXP_LNKCTL2,
7178                                                           &gpu_cfg2);
7179
7180                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7181                                 tmp |= LC_SET_QUIESCE;
7182                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7183
7184                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7185                                 tmp |= LC_REDO_EQ;
7186                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7187
7188                                 msleep(100);
7189
7190                                 /* linkctl */
7191                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7192                                                           &tmp16);
7193                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7194                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7195                                 pcie_capability_write_word(root,
7196                                                            PCI_EXP_LNKCTL,
7197                                                            tmp16);
7198
7199                                 pcie_capability_read_word(rdev->pdev,
7200                                                           PCI_EXP_LNKCTL,
7201                                                           &tmp16);
7202                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7203                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7204                                 pcie_capability_write_word(rdev->pdev,
7205                                                            PCI_EXP_LNKCTL,
7206                                                            tmp16);
7207
7208                                 /* linkctl2 */
7209                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
7210                                                           &tmp16);
7211                                 tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
7212                                            PCI_EXP_LNKCTL2_TX_MARGIN);
7213                                 tmp16 |= (bridge_cfg2 &
7214                                           (PCI_EXP_LNKCTL2_ENTER_COMP |
7215                                            PCI_EXP_LNKCTL2_TX_MARGIN));
7216                                 pcie_capability_write_word(root,
7217                                                            PCI_EXP_LNKCTL2,
7218                                                            tmp16);
7219
7220                                 pcie_capability_read_word(rdev->pdev,
7221                                                           PCI_EXP_LNKCTL2,
7222                                                           &tmp16);
7223                                 tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
7224                                            PCI_EXP_LNKCTL2_TX_MARGIN);
7225                                 tmp16 |= (gpu_cfg2 &
7226                                           (PCI_EXP_LNKCTL2_ENTER_COMP |
7227                                            PCI_EXP_LNKCTL2_TX_MARGIN));
7228                                 pcie_capability_write_word(rdev->pdev,
7229                                                            PCI_EXP_LNKCTL2,
7230                                                            tmp16);
7231
7232                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7233                                 tmp &= ~LC_SET_QUIESCE;
7234                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7235                         }
7236                 }
7237         }
7238
7239         /* set the link speed */
7240         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7241         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7242         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7243
7244         pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
7245         tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
7246         if (speed_cap == PCIE_SPEED_8_0GT)
7247                 tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
7248         else if (speed_cap == PCIE_SPEED_5_0GT)
7249                 tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
7250         else
7251                 tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
7252         pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
7253
7254         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7255         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7256         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7257
7258         for (i = 0; i < rdev->usec_timeout; i++) {
7259                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7260                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7261                         break;
7262                 udelay(1);
7263         }
7264 }
7265
7266 static void si_program_aspm(struct radeon_device *rdev)
7267 {
7268         u32 data, orig;
7269         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7270         bool disable_clkreq = false;
7271
7272         if (radeon_aspm == 0)
7273                 return;
7274
7275         if (!(rdev->flags & RADEON_IS_PCIE))
7276                 return;
7277
7278         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7279         data &= ~LC_XMIT_N_FTS_MASK;
7280         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7281         if (orig != data)
7282                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7283
7284         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7285         data |= LC_GO_TO_RECOVERY;
7286         if (orig != data)
7287                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7288
7289         orig = data = RREG32_PCIE(PCIE_P_CNTL);
7290         data |= P_IGNORE_EDB_ERR;
7291         if (orig != data)
7292                 WREG32_PCIE(PCIE_P_CNTL, data);
7293
7294         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7295         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7296         data |= LC_PMI_TO_L1_DIS;
7297         if (!disable_l0s)
7298                 data |= LC_L0S_INACTIVITY(7);
7299
7300         if (!disable_l1) {
7301                 data |= LC_L1_INACTIVITY(7);
7302                 data &= ~LC_PMI_TO_L1_DIS;
7303                 if (orig != data)
7304                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7305
7306                 if (!disable_plloff_in_l1) {
7307                         bool clk_req_support;
7308
7309                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7310                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7311                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7312                         if (orig != data)
7313                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7314
7315                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7316                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7317                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7318                         if (orig != data)
7319                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7320
7321                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7322                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7323                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7324                         if (orig != data)
7325                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7326
7327                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7328                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7329                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7330                         if (orig != data)
7331                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7332
7333                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7334                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7335                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7336                                 if (orig != data)
7337                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7338
7339                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7340                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7341                                 if (orig != data)
7342                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7343
7344                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7345                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7346                                 if (orig != data)
7347                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7348
7349                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7350                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7351                                 if (orig != data)
7352                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7353
7354                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7355                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7356                                 if (orig != data)
7357                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7358
7359                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7360                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7361                                 if (orig != data)
7362                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7363
7364                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7365                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7366                                 if (orig != data)
7367                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7368
7369                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7370                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7371                                 if (orig != data)
7372                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7373                         }
7374                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7375                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7376                         data |= LC_DYN_LANES_PWR_STATE(3);
7377                         if (orig != data)
7378                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7379
7380                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7381                         data &= ~LS2_EXIT_TIME_MASK;
7382                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7383                                 data |= LS2_EXIT_TIME(5);
7384                         if (orig != data)
7385                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7386
7387                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7388                         data &= ~LS2_EXIT_TIME_MASK;
7389                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7390                                 data |= LS2_EXIT_TIME(5);
7391                         if (orig != data)
7392                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7393
7394                         if (!disable_clkreq &&
7395                             !pci_is_root_bus(rdev->pdev->bus)) {
7396                                 struct pci_dev *root = rdev->pdev->bus->self;
7397                                 u32 lnkcap;
7398
7399                                 clk_req_support = false;
7400                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7401                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7402                                         clk_req_support = true;
7403                         } else {
7404                                 clk_req_support = false;
7405                         }
7406
7407                         if (clk_req_support) {
7408                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7409                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7410                                 if (orig != data)
7411                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7412
7413                                 orig = data = RREG32(THM_CLK_CNTL);
7414                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7415                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7416                                 if (orig != data)
7417                                         WREG32(THM_CLK_CNTL, data);
7418
7419                                 orig = data = RREG32(MISC_CLK_CNTL);
7420                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7421                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7422                                 if (orig != data)
7423                                         WREG32(MISC_CLK_CNTL, data);
7424
7425                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7426                                 data &= ~BCLK_AS_XCLK;
7427                                 if (orig != data)
7428                                         WREG32(CG_CLKPIN_CNTL, data);
7429
7430                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7431                                 data &= ~FORCE_BIF_REFCLK_EN;
7432                                 if (orig != data)
7433                                         WREG32(CG_CLKPIN_CNTL_2, data);
7434
7435                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7436                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7437                                 data |= MPLL_CLKOUT_SEL(4);
7438                                 if (orig != data)
7439                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7440
7441                                 orig = data = RREG32(SPLL_CNTL_MODE);
7442                                 data &= ~SPLL_REFCLK_SEL_MASK;
7443                                 if (orig != data)
7444                                         WREG32(SPLL_CNTL_MODE, data);
7445                         }
7446                 }
7447         } else {
7448                 if (orig != data)
7449                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7450         }
7451
7452         orig = data = RREG32_PCIE(PCIE_CNTL2);
7453         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7454         if (orig != data)
7455                 WREG32_PCIE(PCIE_CNTL2, data);
7456
7457         if (!disable_l0s) {
7458                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7459                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7460                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7461                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7462                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7463                                 data &= ~LC_L0S_INACTIVITY_MASK;
7464                                 if (orig != data)
7465                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7466                         }
7467                 }
7468         }
7469 }
7470
7471 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7472 {
7473         unsigned i;
7474
7475         /* make sure VCEPLL_CTLREQ is deasserted */
7476         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7477
7478         mdelay(10);
7479
7480         /* assert UPLL_CTLREQ */
7481         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7482
7483         /* wait for CTLACK and CTLACK2 to get asserted */
7484         for (i = 0; i < 100; ++i) {
7485                 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7486                 if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7487                         break;
7488                 mdelay(10);
7489         }
7490
7491         /* deassert UPLL_CTLREQ */
7492         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7493
7494         if (i == 100) {
7495                 DRM_ERROR("Timeout setting UVD clocks!\n");
7496                 return -ETIMEDOUT;
7497         }
7498
7499         return 0;
7500 }
7501
7502 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7503 {
7504         unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7505         int r;
7506
7507         /* bypass evclk and ecclk with bclk */
7508         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7509                      EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7510                      ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7511
7512         /* put PLL in bypass mode */
7513         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7514                      ~VCEPLL_BYPASS_EN_MASK);
7515
7516         if (!evclk || !ecclk) {
7517                 /* keep the Bypass mode, put PLL to sleep */
7518                 WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7519                              ~VCEPLL_SLEEP_MASK);
7520                 return 0;
7521         }
7522
7523         r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7524                                           16384, 0x03FFFFFF, 0, 128, 5,
7525                                           &fb_div, &evclk_div, &ecclk_div);
7526         if (r)
7527                 return r;
7528
7529         /* set RESET_ANTI_MUX to 0 */
7530         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7531
7532         /* set VCO_MODE to 1 */
7533         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7534                      ~VCEPLL_VCO_MODE_MASK);
7535
7536         /* toggle VCEPLL_SLEEP to 1 then back to 0 */
7537         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7538                      ~VCEPLL_SLEEP_MASK);
7539         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7540
7541         /* deassert VCEPLL_RESET */
7542         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7543
7544         mdelay(1);
7545
7546         r = si_vce_send_vcepll_ctlreq(rdev);
7547         if (r)
7548                 return r;
7549
7550         /* assert VCEPLL_RESET again */
7551         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7552
7553         /* disable spread spectrum. */
7554         WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7555
7556         /* set feedback divider */
7557         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7558
7559         /* set ref divider to 0 */
7560         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7561
7562         /* set PDIV_A and PDIV_B */
7563         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7564                      VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7565                      ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7566
7567         /* give the PLL some time to settle */
7568         mdelay(15);
7569
7570         /* deassert PLL_RESET */
7571         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7572
7573         mdelay(15);
7574
7575         /* switch from bypass mode to normal mode */
7576         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7577
7578         r = si_vce_send_vcepll_ctlreq(rdev);
7579         if (r)
7580                 return r;
7581
7582         /* switch VCLK and DCLK selection */
7583         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7584                      EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7585                      ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7586
7587         mdelay(100);
7588
7589         return 0;
7590 }