m68k: Migrate exception table users off module.h and onto extable.h
[platform/kernel/linux-exynos.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "atombios_i2c.h"
32 #include "clearstate_vi.h"
33
34 #include "gmc/gmc_8_2_d.h"
35 #include "gmc/gmc_8_2_sh_mask.h"
36
37 #include "oss/oss_3_0_d.h"
38 #include "oss/oss_3_0_sh_mask.h"
39
40 #include "bif/bif_5_0_d.h"
41 #include "bif/bif_5_0_sh_mask.h"
42
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50
51 #include "smu/smu_7_1_3_d.h"
52
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_NUM_COMPUTE_RINGS 8
55
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60
61 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81
82 /* BPM Register Address*/
83 enum {
84         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89         BPM_REG_FGCG_MAX
90 };
91
92 #define RLC_FormatDirectRegListLength        14
93
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140
141 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
142 {
143         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
144         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
145         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
146         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
147         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
148         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
149         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
150         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
151         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
152         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
153         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
154         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
155         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
156         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
157         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
158         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
159 };
160
161 static const u32 golden_settings_tonga_a11[] =
162 {
163         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
164         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
165         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
166         mmGB_GPU_ID, 0x0000000f, 0x00000000,
167         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
168         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
169         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
170         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
171         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
172         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
173         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
174         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
175         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
176         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
177         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
178 };
179
180 static const u32 tonga_golden_common_all[] =
181 {
182         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
183         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
184         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
185         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
186         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
187         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
188         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
189         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
190 };
191
192 static const u32 tonga_mgcg_cgcg_init[] =
193 {
194         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
195         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
196         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
197         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
198         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
199         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
200         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
201         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
202         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
203         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
204         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
205         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
209         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
210         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
211         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
212         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
213         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
214         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
215         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
216         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
217         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
218         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
219         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
220         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
221         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
222         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
223         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
224         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
225         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
226         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
227         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
228         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
229         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
230         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
231         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
232         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
233         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
234         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
235         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
236         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
237         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
238         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
239         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
240         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
241         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
242         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
243         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
244         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
245         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
246         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
247         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
248         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
249         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
250         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
251         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
252         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
253         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
254         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
255         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
256         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
257         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
258         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
259         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
260         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
261         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
262         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
263         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
264         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
265         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
266         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
267         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
268         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
269 };
270
271 static const u32 golden_settings_polaris11_a11[] =
272 {
273         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
274         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
275         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
276         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
277         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
278         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
279         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
280         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
281         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
282         mmSQ_CONFIG, 0x07f80000, 0x07180000,
283         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
284         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
285         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
286         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
287         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
288         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
289 };
290
291 static const u32 polaris11_golden_common_all[] =
292 {
293         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
294         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
295         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
296         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
297         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
298         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
299 };
300
301 static const u32 golden_settings_polaris10_a11[] =
302 {
303         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
304         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
305         mmCB_HW_CONTROL_2, 0, 0x0f000000,
306         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
307         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
308         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
309         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
310         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
311         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
312         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
313         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
314         mmSQ_CONFIG, 0x07f80000, 0x07180000,
315         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
316         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
317         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
318         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
319         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
320 };
321
322 static const u32 polaris10_golden_common_all[] =
323 {
324         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
325         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
326         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
327         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
328         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
329         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
330         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
331         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
332 };
333
334 static const u32 fiji_golden_common_all[] =
335 {
336         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
337         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
338         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
339         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
340         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
341         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
342         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
343         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
344         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
345         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
346 };
347
348 static const u32 golden_settings_fiji_a10[] =
349 {
350         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
351         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
352         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
353         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
354         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
355         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
356         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
357         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
358         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
359         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
360         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
361 };
362
363 static const u32 fiji_mgcg_cgcg_init[] =
364 {
365         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
366         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
367         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
368         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
369         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
370         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
371         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
372         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
373         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
374         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
375         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
376         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
377         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
381         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
382         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
383         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
384         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
385         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
386         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
387         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
388         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
389         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
390         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
391         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
392         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
393         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
394         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
395         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
396         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
397         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
398         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
399         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
400 };
401
402 static const u32 golden_settings_iceland_a11[] =
403 {
404         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
405         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
406         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
407         mmGB_GPU_ID, 0x0000000f, 0x00000000,
408         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
409         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
410         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
411         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
412         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
413         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
414         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
415         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
416         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
417         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
418         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
419 };
420
421 static const u32 iceland_golden_common_all[] =
422 {
423         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
424         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
425         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
426         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
427         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
428         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
429         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
430         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
431 };
432
433 static const u32 iceland_mgcg_cgcg_init[] =
434 {
435         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
436         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
437         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
439         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
440         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
441         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
442         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
444         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
446         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
452         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
453         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
454         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
455         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
456         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
457         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
458         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
459         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
460         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
461         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
462         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
463         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
464         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
465         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
466         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
467         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
468         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
469         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
470         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
471         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
472         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
473         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
474         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
475         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
476         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
479         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
484         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
489         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
494         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
497         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
498         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
499 };
500
501 static const u32 cz_golden_settings_a11[] =
502 {
503         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
504         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
505         mmGB_GPU_ID, 0x0000000f, 0x00000000,
506         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
507         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
508         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
509         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
510         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
511         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
512         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
513 };
514
515 static const u32 cz_golden_common_all[] =
516 {
517         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
518         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
519         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
520         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
521         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
522         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
523         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
524         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
525 };
526
527 static const u32 cz_mgcg_cgcg_init[] =
528 {
529         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
530         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
532         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
533         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
534         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
535         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
536         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
537         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
538         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
539         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
540         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
541         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
542         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
543         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
544         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
547         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
548         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
549         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
550         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
551         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
552         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
553         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
554         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
555         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
556         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
557         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
558         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
559         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
560         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
561         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
562         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
563         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
564         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
565         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
566         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
567         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
568         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
569         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
570         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
571         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
572         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
573         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
574         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
575         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
576         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
577         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
578         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
579         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
580         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
581         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
582         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
583         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
584         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
585         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
586         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
587         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
588         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
589         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
590         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
591         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
592         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
593         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
594         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
595         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
596         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
597         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
598         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
599         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
600         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
601         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
602         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
603         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
604 };
605
606 static const u32 stoney_golden_settings_a11[] =
607 {
608         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
609         mmGB_GPU_ID, 0x0000000f, 0x00000000,
610         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
611         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
612         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
613         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
614         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
615         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
616         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
617         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
618 };
619
620 static const u32 stoney_golden_common_all[] =
621 {
622         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
623         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
624         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
625         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
626         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
627         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
628         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
629         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
630 };
631
632 static const u32 stoney_mgcg_cgcg_init[] =
633 {
634         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
635         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
636         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
637         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
638         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
639         mmATC_MISC_CG, 0xffffffff, 0x000c0200,
640 };
641
642 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
643 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
644 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
645 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
646 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
647 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
648
649 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
650 {
651         switch (adev->asic_type) {
652         case CHIP_TOPAZ:
653                 amdgpu_program_register_sequence(adev,
654                                                  iceland_mgcg_cgcg_init,
655                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
656                 amdgpu_program_register_sequence(adev,
657                                                  golden_settings_iceland_a11,
658                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
659                 amdgpu_program_register_sequence(adev,
660                                                  iceland_golden_common_all,
661                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
662                 break;
663         case CHIP_FIJI:
664                 amdgpu_program_register_sequence(adev,
665                                                  fiji_mgcg_cgcg_init,
666                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
667                 amdgpu_program_register_sequence(adev,
668                                                  golden_settings_fiji_a10,
669                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
670                 amdgpu_program_register_sequence(adev,
671                                                  fiji_golden_common_all,
672                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
673                 break;
674
675         case CHIP_TONGA:
676                 amdgpu_program_register_sequence(adev,
677                                                  tonga_mgcg_cgcg_init,
678                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
679                 amdgpu_program_register_sequence(adev,
680                                                  golden_settings_tonga_a11,
681                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
682                 amdgpu_program_register_sequence(adev,
683                                                  tonga_golden_common_all,
684                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
685                 break;
686         case CHIP_POLARIS11:
687                 amdgpu_program_register_sequence(adev,
688                                                  golden_settings_polaris11_a11,
689                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
690                 amdgpu_program_register_sequence(adev,
691                                                  polaris11_golden_common_all,
692                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
693                 break;
694         case CHIP_POLARIS10:
695                 amdgpu_program_register_sequence(adev,
696                                                  golden_settings_polaris10_a11,
697                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
698                 amdgpu_program_register_sequence(adev,
699                                                  polaris10_golden_common_all,
700                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
701                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
702                 if (adev->pdev->revision == 0xc7) {
703                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
704                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
705                 }
706                 break;
707         case CHIP_CARRIZO:
708                 amdgpu_program_register_sequence(adev,
709                                                  cz_mgcg_cgcg_init,
710                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
711                 amdgpu_program_register_sequence(adev,
712                                                  cz_golden_settings_a11,
713                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
714                 amdgpu_program_register_sequence(adev,
715                                                  cz_golden_common_all,
716                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
717                 break;
718         case CHIP_STONEY:
719                 amdgpu_program_register_sequence(adev,
720                                                  stoney_mgcg_cgcg_init,
721                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
722                 amdgpu_program_register_sequence(adev,
723                                                  stoney_golden_settings_a11,
724                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
725                 amdgpu_program_register_sequence(adev,
726                                                  stoney_golden_common_all,
727                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
728                 break;
729         default:
730                 break;
731         }
732 }
733
734 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
735 {
736         int i;
737
738         adev->gfx.scratch.num_reg = 7;
739         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
740         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
741                 adev->gfx.scratch.free[i] = true;
742                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
743         }
744 }
745
746 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
747 {
748         struct amdgpu_device *adev = ring->adev;
749         uint32_t scratch;
750         uint32_t tmp = 0;
751         unsigned i;
752         int r;
753
754         r = amdgpu_gfx_scratch_get(adev, &scratch);
755         if (r) {
756                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
757                 return r;
758         }
759         WREG32(scratch, 0xCAFEDEAD);
760         r = amdgpu_ring_alloc(ring, 3);
761         if (r) {
762                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
763                           ring->idx, r);
764                 amdgpu_gfx_scratch_free(adev, scratch);
765                 return r;
766         }
767         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
768         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
769         amdgpu_ring_write(ring, 0xDEADBEEF);
770         amdgpu_ring_commit(ring);
771
772         for (i = 0; i < adev->usec_timeout; i++) {
773                 tmp = RREG32(scratch);
774                 if (tmp == 0xDEADBEEF)
775                         break;
776                 DRM_UDELAY(1);
777         }
778         if (i < adev->usec_timeout) {
779                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
780                          ring->idx, i);
781         } else {
782                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
783                           ring->idx, scratch, tmp);
784                 r = -EINVAL;
785         }
786         amdgpu_gfx_scratch_free(adev, scratch);
787         return r;
788 }
789
790 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
791 {
792         struct amdgpu_device *adev = ring->adev;
793         struct amdgpu_ib ib;
794         struct fence *f = NULL;
795         uint32_t scratch;
796         uint32_t tmp = 0;
797         long r;
798
799         r = amdgpu_gfx_scratch_get(adev, &scratch);
800         if (r) {
801                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
802                 return r;
803         }
804         WREG32(scratch, 0xCAFEDEAD);
805         memset(&ib, 0, sizeof(ib));
806         r = amdgpu_ib_get(adev, NULL, 256, &ib);
807         if (r) {
808                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
809                 goto err1;
810         }
811         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
812         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
813         ib.ptr[2] = 0xDEADBEEF;
814         ib.length_dw = 3;
815
816         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
817         if (r)
818                 goto err2;
819
820         r = fence_wait_timeout(f, false, timeout);
821         if (r == 0) {
822                 DRM_ERROR("amdgpu: IB test timed out.\n");
823                 r = -ETIMEDOUT;
824                 goto err2;
825         } else if (r < 0) {
826                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
827                 goto err2;
828         }
829         tmp = RREG32(scratch);
830         if (tmp == 0xDEADBEEF) {
831                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
832                 r = 0;
833         } else {
834                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
835                           scratch, tmp);
836                 r = -EINVAL;
837         }
838 err2:
839         amdgpu_ib_free(adev, &ib, NULL);
840         fence_put(f);
841 err1:
842         amdgpu_gfx_scratch_free(adev, scratch);
843         return r;
844 }
845
846
847 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
848         release_firmware(adev->gfx.pfp_fw);
849         adev->gfx.pfp_fw = NULL;
850         release_firmware(adev->gfx.me_fw);
851         adev->gfx.me_fw = NULL;
852         release_firmware(adev->gfx.ce_fw);
853         adev->gfx.ce_fw = NULL;
854         release_firmware(adev->gfx.rlc_fw);
855         adev->gfx.rlc_fw = NULL;
856         release_firmware(adev->gfx.mec_fw);
857         adev->gfx.mec_fw = NULL;
858         if ((adev->asic_type != CHIP_STONEY) &&
859             (adev->asic_type != CHIP_TOPAZ))
860                 release_firmware(adev->gfx.mec2_fw);
861         adev->gfx.mec2_fw = NULL;
862
863         kfree(adev->gfx.rlc.register_list_format);
864 }
865
866 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
867 {
868         const char *chip_name;
869         char fw_name[30];
870         int err;
871         struct amdgpu_firmware_info *info = NULL;
872         const struct common_firmware_header *header = NULL;
873         const struct gfx_firmware_header_v1_0 *cp_hdr;
874         const struct rlc_firmware_header_v2_0 *rlc_hdr;
875         unsigned int *tmp = NULL, i;
876
877         DRM_DEBUG("\n");
878
879         switch (adev->asic_type) {
880         case CHIP_TOPAZ:
881                 chip_name = "topaz";
882                 break;
883         case CHIP_TONGA:
884                 chip_name = "tonga";
885                 break;
886         case CHIP_CARRIZO:
887                 chip_name = "carrizo";
888                 break;
889         case CHIP_FIJI:
890                 chip_name = "fiji";
891                 break;
892         case CHIP_POLARIS11:
893                 chip_name = "polaris11";
894                 break;
895         case CHIP_POLARIS10:
896                 chip_name = "polaris10";
897                 break;
898         case CHIP_STONEY:
899                 chip_name = "stoney";
900                 break;
901         default:
902                 BUG();
903         }
904
905         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
906         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
907         if (err)
908                 goto out;
909         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
910         if (err)
911                 goto out;
912         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
913         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
914         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
915
916         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
917         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
918         if (err)
919                 goto out;
920         err = amdgpu_ucode_validate(adev->gfx.me_fw);
921         if (err)
922                 goto out;
923         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
924         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
925         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
926
927         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
928         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
929         if (err)
930                 goto out;
931         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
932         if (err)
933                 goto out;
934         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
935         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
936         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
937
938         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
939         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
940         if (err)
941                 goto out;
942         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
943         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
944         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
945         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
946
947         adev->gfx.rlc.save_and_restore_offset =
948                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
949         adev->gfx.rlc.clear_state_descriptor_offset =
950                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
951         adev->gfx.rlc.avail_scratch_ram_locations =
952                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
953         adev->gfx.rlc.reg_restore_list_size =
954                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
955         adev->gfx.rlc.reg_list_format_start =
956                         le32_to_cpu(rlc_hdr->reg_list_format_start);
957         adev->gfx.rlc.reg_list_format_separate_start =
958                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
959         adev->gfx.rlc.starting_offsets_start =
960                         le32_to_cpu(rlc_hdr->starting_offsets_start);
961         adev->gfx.rlc.reg_list_format_size_bytes =
962                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
963         adev->gfx.rlc.reg_list_size_bytes =
964                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
965
966         adev->gfx.rlc.register_list_format =
967                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
968                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
969
970         if (!adev->gfx.rlc.register_list_format) {
971                 err = -ENOMEM;
972                 goto out;
973         }
974
975         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
976                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
977         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
978                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
979
980         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
981
982         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
983                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
984         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
985                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
986
987         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
988         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
989         if (err)
990                 goto out;
991         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
992         if (err)
993                 goto out;
994         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
995         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
996         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
997
998         if ((adev->asic_type != CHIP_STONEY) &&
999             (adev->asic_type != CHIP_TOPAZ)) {
1000                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1001                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1002                 if (!err) {
1003                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1004                         if (err)
1005                                 goto out;
1006                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1007                                 adev->gfx.mec2_fw->data;
1008                         adev->gfx.mec2_fw_version =
1009                                 le32_to_cpu(cp_hdr->header.ucode_version);
1010                         adev->gfx.mec2_feature_version =
1011                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1012                 } else {
1013                         err = 0;
1014                         adev->gfx.mec2_fw = NULL;
1015                 }
1016         }
1017
1018         if (adev->firmware.smu_load) {
1019                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1020                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1021                 info->fw = adev->gfx.pfp_fw;
1022                 header = (const struct common_firmware_header *)info->fw->data;
1023                 adev->firmware.fw_size +=
1024                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1025
1026                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1027                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1028                 info->fw = adev->gfx.me_fw;
1029                 header = (const struct common_firmware_header *)info->fw->data;
1030                 adev->firmware.fw_size +=
1031                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1032
1033                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1034                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1035                 info->fw = adev->gfx.ce_fw;
1036                 header = (const struct common_firmware_header *)info->fw->data;
1037                 adev->firmware.fw_size +=
1038                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1039
1040                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1041                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1042                 info->fw = adev->gfx.rlc_fw;
1043                 header = (const struct common_firmware_header *)info->fw->data;
1044                 adev->firmware.fw_size +=
1045                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1046
1047                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1048                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1049                 info->fw = adev->gfx.mec_fw;
1050                 header = (const struct common_firmware_header *)info->fw->data;
1051                 adev->firmware.fw_size +=
1052                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1053
1054                 if (adev->gfx.mec2_fw) {
1055                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1056                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1057                         info->fw = adev->gfx.mec2_fw;
1058                         header = (const struct common_firmware_header *)info->fw->data;
1059                         adev->firmware.fw_size +=
1060                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1061                 }
1062
1063         }
1064
1065 out:
1066         if (err) {
1067                 dev_err(adev->dev,
1068                         "gfx8: Failed to load firmware \"%s\"\n",
1069                         fw_name);
1070                 release_firmware(adev->gfx.pfp_fw);
1071                 adev->gfx.pfp_fw = NULL;
1072                 release_firmware(adev->gfx.me_fw);
1073                 adev->gfx.me_fw = NULL;
1074                 release_firmware(adev->gfx.ce_fw);
1075                 adev->gfx.ce_fw = NULL;
1076                 release_firmware(adev->gfx.rlc_fw);
1077                 adev->gfx.rlc_fw = NULL;
1078                 release_firmware(adev->gfx.mec_fw);
1079                 adev->gfx.mec_fw = NULL;
1080                 release_firmware(adev->gfx.mec2_fw);
1081                 adev->gfx.mec2_fw = NULL;
1082         }
1083         return err;
1084 }
1085
1086 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1087                                     volatile u32 *buffer)
1088 {
1089         u32 count = 0, i;
1090         const struct cs_section_def *sect = NULL;
1091         const struct cs_extent_def *ext = NULL;
1092
1093         if (adev->gfx.rlc.cs_data == NULL)
1094                 return;
1095         if (buffer == NULL)
1096                 return;
1097
1098         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1099         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1100
1101         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1102         buffer[count++] = cpu_to_le32(0x80000000);
1103         buffer[count++] = cpu_to_le32(0x80000000);
1104
1105         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1106                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1107                         if (sect->id == SECT_CONTEXT) {
1108                                 buffer[count++] =
1109                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1110                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1111                                                 PACKET3_SET_CONTEXT_REG_START);
1112                                 for (i = 0; i < ext->reg_count; i++)
1113                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1114                         } else {
1115                                 return;
1116                         }
1117                 }
1118         }
1119
1120         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1121         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1122                         PACKET3_SET_CONTEXT_REG_START);
1123         switch (adev->asic_type) {
1124         case CHIP_TONGA:
1125         case CHIP_POLARIS10:
1126                 buffer[count++] = cpu_to_le32(0x16000012);
1127                 buffer[count++] = cpu_to_le32(0x0000002A);
1128                 break;
1129         case CHIP_POLARIS11:
1130                 buffer[count++] = cpu_to_le32(0x16000012);
1131                 buffer[count++] = cpu_to_le32(0x00000000);
1132                 break;
1133         case CHIP_FIJI:
1134                 buffer[count++] = cpu_to_le32(0x3a00161a);
1135                 buffer[count++] = cpu_to_le32(0x0000002e);
1136                 break;
1137         case CHIP_TOPAZ:
1138         case CHIP_CARRIZO:
1139                 buffer[count++] = cpu_to_le32(0x00000002);
1140                 buffer[count++] = cpu_to_le32(0x00000000);
1141                 break;
1142         case CHIP_STONEY:
1143                 buffer[count++] = cpu_to_le32(0x00000000);
1144                 buffer[count++] = cpu_to_le32(0x00000000);
1145                 break;
1146         default:
1147                 buffer[count++] = cpu_to_le32(0x00000000);
1148                 buffer[count++] = cpu_to_le32(0x00000000);
1149                 break;
1150         }
1151
1152         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1153         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1154
1155         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1156         buffer[count++] = cpu_to_le32(0);
1157 }
1158
1159 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1160 {
1161         const __le32 *fw_data;
1162         volatile u32 *dst_ptr;
1163         int me, i, max_me = 4;
1164         u32 bo_offset = 0;
1165         u32 table_offset, table_size;
1166
1167         if (adev->asic_type == CHIP_CARRIZO)
1168                 max_me = 5;
1169
1170         /* write the cp table buffer */
1171         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1172         for (me = 0; me < max_me; me++) {
1173                 if (me == 0) {
1174                         const struct gfx_firmware_header_v1_0 *hdr =
1175                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1176                         fw_data = (const __le32 *)
1177                                 (adev->gfx.ce_fw->data +
1178                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1179                         table_offset = le32_to_cpu(hdr->jt_offset);
1180                         table_size = le32_to_cpu(hdr->jt_size);
1181                 } else if (me == 1) {
1182                         const struct gfx_firmware_header_v1_0 *hdr =
1183                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1184                         fw_data = (const __le32 *)
1185                                 (adev->gfx.pfp_fw->data +
1186                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1187                         table_offset = le32_to_cpu(hdr->jt_offset);
1188                         table_size = le32_to_cpu(hdr->jt_size);
1189                 } else if (me == 2) {
1190                         const struct gfx_firmware_header_v1_0 *hdr =
1191                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1192                         fw_data = (const __le32 *)
1193                                 (adev->gfx.me_fw->data +
1194                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1195                         table_offset = le32_to_cpu(hdr->jt_offset);
1196                         table_size = le32_to_cpu(hdr->jt_size);
1197                 } else if (me == 3) {
1198                         const struct gfx_firmware_header_v1_0 *hdr =
1199                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1200                         fw_data = (const __le32 *)
1201                                 (adev->gfx.mec_fw->data +
1202                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1203                         table_offset = le32_to_cpu(hdr->jt_offset);
1204                         table_size = le32_to_cpu(hdr->jt_size);
1205                 } else  if (me == 4) {
1206                         const struct gfx_firmware_header_v1_0 *hdr =
1207                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1208                         fw_data = (const __le32 *)
1209                                 (adev->gfx.mec2_fw->data +
1210                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1211                         table_offset = le32_to_cpu(hdr->jt_offset);
1212                         table_size = le32_to_cpu(hdr->jt_size);
1213                 }
1214
1215                 for (i = 0; i < table_size; i ++) {
1216                         dst_ptr[bo_offset + i] =
1217                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1218                 }
1219
1220                 bo_offset += table_size;
1221         }
1222 }
1223
1224 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1225 {
1226         int r;
1227
1228         /* clear state block */
1229         if (adev->gfx.rlc.clear_state_obj) {
1230                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1231                 if (unlikely(r != 0))
1232                         dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
1233                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1234                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1235
1236                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1237                 adev->gfx.rlc.clear_state_obj = NULL;
1238         }
1239
1240         /* jump table block */
1241         if (adev->gfx.rlc.cp_table_obj) {
1242                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1243                 if (unlikely(r != 0))
1244                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1245                 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1246                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1247
1248                 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1249                 adev->gfx.rlc.cp_table_obj = NULL;
1250         }
1251 }
1252
1253 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1254 {
1255         volatile u32 *dst_ptr;
1256         u32 dws;
1257         const struct cs_section_def *cs_data;
1258         int r;
1259
1260         adev->gfx.rlc.cs_data = vi_cs_data;
1261
1262         cs_data = adev->gfx.rlc.cs_data;
1263
1264         if (cs_data) {
1265                 /* clear state block */
1266                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1267
1268                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1269                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1270                                              AMDGPU_GEM_DOMAIN_VRAM,
1271                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1272                                              NULL, NULL,
1273                                              &adev->gfx.rlc.clear_state_obj);
1274                         if (r) {
1275                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1276                                 gfx_v8_0_rlc_fini(adev);
1277                                 return r;
1278                         }
1279                 }
1280                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1281                 if (unlikely(r != 0)) {
1282                         gfx_v8_0_rlc_fini(adev);
1283                         return r;
1284                 }
1285                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1286                                   &adev->gfx.rlc.clear_state_gpu_addr);
1287                 if (r) {
1288                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1289                         dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
1290                         gfx_v8_0_rlc_fini(adev);
1291                         return r;
1292                 }
1293
1294                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1295                 if (r) {
1296                         dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
1297                         gfx_v8_0_rlc_fini(adev);
1298                         return r;
1299                 }
1300                 /* set up the cs buffer */
1301                 dst_ptr = adev->gfx.rlc.cs_ptr;
1302                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1303                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1304                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1305         }
1306
1307         if ((adev->asic_type == CHIP_CARRIZO) ||
1308             (adev->asic_type == CHIP_STONEY)) {
1309                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1310                 if (adev->gfx.rlc.cp_table_obj == NULL) {
1311                         r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1312                                              AMDGPU_GEM_DOMAIN_VRAM,
1313                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1314                                              NULL, NULL,
1315                                              &adev->gfx.rlc.cp_table_obj);
1316                         if (r) {
1317                                 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1318                                 return r;
1319                         }
1320                 }
1321
1322                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1323                 if (unlikely(r != 0)) {
1324                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1325                         return r;
1326                 }
1327                 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1328                                   &adev->gfx.rlc.cp_table_gpu_addr);
1329                 if (r) {
1330                         amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1331                         dev_warn(adev->dev, "(%d) pin RLC cp_table bo failed\n", r);
1332                         return r;
1333                 }
1334                 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1335                 if (r) {
1336                         dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1337                         return r;
1338                 }
1339
1340                 cz_init_cp_jump_table(adev);
1341
1342                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1343                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1344
1345         }
1346
1347         return 0;
1348 }
1349
1350 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1351 {
1352         int r;
1353
1354         if (adev->gfx.mec.hpd_eop_obj) {
1355                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1356                 if (unlikely(r != 0))
1357                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1358                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1359                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1360
1361                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1362                 adev->gfx.mec.hpd_eop_obj = NULL;
1363         }
1364 }
1365
1366 #define MEC_HPD_SIZE 2048
1367
1368 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1369 {
1370         int r;
1371         u32 *hpd;
1372
1373         /*
1374          * we assign only 1 pipe because all other pipes will
1375          * be handled by KFD
1376          */
1377         adev->gfx.mec.num_mec = 1;
1378         adev->gfx.mec.num_pipe = 1;
1379         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1380
1381         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1382                 r = amdgpu_bo_create(adev,
1383                                      adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1384                                      PAGE_SIZE, true,
1385                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1386                                      &adev->gfx.mec.hpd_eop_obj);
1387                 if (r) {
1388                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1389                         return r;
1390                 }
1391         }
1392
1393         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1394         if (unlikely(r != 0)) {
1395                 gfx_v8_0_mec_fini(adev);
1396                 return r;
1397         }
1398         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1399                           &adev->gfx.mec.hpd_eop_gpu_addr);
1400         if (r) {
1401                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1402                 gfx_v8_0_mec_fini(adev);
1403                 return r;
1404         }
1405         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1406         if (r) {
1407                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1408                 gfx_v8_0_mec_fini(adev);
1409                 return r;
1410         }
1411
1412         memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1413
1414         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1415         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1416
1417         return 0;
1418 }
1419
1420 static const u32 vgpr_init_compute_shader[] =
1421 {
1422         0x7e000209, 0x7e020208,
1423         0x7e040207, 0x7e060206,
1424         0x7e080205, 0x7e0a0204,
1425         0x7e0c0203, 0x7e0e0202,
1426         0x7e100201, 0x7e120200,
1427         0x7e140209, 0x7e160208,
1428         0x7e180207, 0x7e1a0206,
1429         0x7e1c0205, 0x7e1e0204,
1430         0x7e200203, 0x7e220202,
1431         0x7e240201, 0x7e260200,
1432         0x7e280209, 0x7e2a0208,
1433         0x7e2c0207, 0x7e2e0206,
1434         0x7e300205, 0x7e320204,
1435         0x7e340203, 0x7e360202,
1436         0x7e380201, 0x7e3a0200,
1437         0x7e3c0209, 0x7e3e0208,
1438         0x7e400207, 0x7e420206,
1439         0x7e440205, 0x7e460204,
1440         0x7e480203, 0x7e4a0202,
1441         0x7e4c0201, 0x7e4e0200,
1442         0x7e500209, 0x7e520208,
1443         0x7e540207, 0x7e560206,
1444         0x7e580205, 0x7e5a0204,
1445         0x7e5c0203, 0x7e5e0202,
1446         0x7e600201, 0x7e620200,
1447         0x7e640209, 0x7e660208,
1448         0x7e680207, 0x7e6a0206,
1449         0x7e6c0205, 0x7e6e0204,
1450         0x7e700203, 0x7e720202,
1451         0x7e740201, 0x7e760200,
1452         0x7e780209, 0x7e7a0208,
1453         0x7e7c0207, 0x7e7e0206,
1454         0xbf8a0000, 0xbf810000,
1455 };
1456
1457 static const u32 sgpr_init_compute_shader[] =
1458 {
1459         0xbe8a0100, 0xbe8c0102,
1460         0xbe8e0104, 0xbe900106,
1461         0xbe920108, 0xbe940100,
1462         0xbe960102, 0xbe980104,
1463         0xbe9a0106, 0xbe9c0108,
1464         0xbe9e0100, 0xbea00102,
1465         0xbea20104, 0xbea40106,
1466         0xbea60108, 0xbea80100,
1467         0xbeaa0102, 0xbeac0104,
1468         0xbeae0106, 0xbeb00108,
1469         0xbeb20100, 0xbeb40102,
1470         0xbeb60104, 0xbeb80106,
1471         0xbeba0108, 0xbebc0100,
1472         0xbebe0102, 0xbec00104,
1473         0xbec20106, 0xbec40108,
1474         0xbec60100, 0xbec80102,
1475         0xbee60004, 0xbee70005,
1476         0xbeea0006, 0xbeeb0007,
1477         0xbee80008, 0xbee90009,
1478         0xbefc0000, 0xbf8a0000,
1479         0xbf810000, 0x00000000,
1480 };
1481
1482 static const u32 vgpr_init_regs[] =
1483 {
1484         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1485         mmCOMPUTE_RESOURCE_LIMITS, 0,
1486         mmCOMPUTE_NUM_THREAD_X, 256*4,
1487         mmCOMPUTE_NUM_THREAD_Y, 1,
1488         mmCOMPUTE_NUM_THREAD_Z, 1,
1489         mmCOMPUTE_PGM_RSRC2, 20,
1490         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1491         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1492         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1493         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1494         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1495         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1496         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1497         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1498         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1499         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1500 };
1501
1502 static const u32 sgpr1_init_regs[] =
1503 {
1504         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1505         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1506         mmCOMPUTE_NUM_THREAD_X, 256*5,
1507         mmCOMPUTE_NUM_THREAD_Y, 1,
1508         mmCOMPUTE_NUM_THREAD_Z, 1,
1509         mmCOMPUTE_PGM_RSRC2, 20,
1510         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1511         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1512         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1513         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1514         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1515         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1516         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1517         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1518         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1519         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1520 };
1521
1522 static const u32 sgpr2_init_regs[] =
1523 {
1524         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1525         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1526         mmCOMPUTE_NUM_THREAD_X, 256*5,
1527         mmCOMPUTE_NUM_THREAD_Y, 1,
1528         mmCOMPUTE_NUM_THREAD_Z, 1,
1529         mmCOMPUTE_PGM_RSRC2, 20,
1530         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1531         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1532         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1533         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1534         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1535         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1536         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1537         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1538         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1539         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1540 };
1541
1542 static const u32 sec_ded_counter_registers[] =
1543 {
1544         mmCPC_EDC_ATC_CNT,
1545         mmCPC_EDC_SCRATCH_CNT,
1546         mmCPC_EDC_UCODE_CNT,
1547         mmCPF_EDC_ATC_CNT,
1548         mmCPF_EDC_ROQ_CNT,
1549         mmCPF_EDC_TAG_CNT,
1550         mmCPG_EDC_ATC_CNT,
1551         mmCPG_EDC_DMA_CNT,
1552         mmCPG_EDC_TAG_CNT,
1553         mmDC_EDC_CSINVOC_CNT,
1554         mmDC_EDC_RESTORE_CNT,
1555         mmDC_EDC_STATE_CNT,
1556         mmGDS_EDC_CNT,
1557         mmGDS_EDC_GRBM_CNT,
1558         mmGDS_EDC_OA_DED,
1559         mmSPI_EDC_CNT,
1560         mmSQC_ATC_EDC_GATCL1_CNT,
1561         mmSQC_EDC_CNT,
1562         mmSQ_EDC_DED_CNT,
1563         mmSQ_EDC_INFO,
1564         mmSQ_EDC_SEC_CNT,
1565         mmTCC_EDC_CNT,
1566         mmTCP_ATC_EDC_GATCL1_CNT,
1567         mmTCP_EDC_CNT,
1568         mmTD_EDC_CNT
1569 };
1570
1571 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1572 {
1573         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1574         struct amdgpu_ib ib;
1575         struct fence *f = NULL;
1576         int r, i;
1577         u32 tmp;
1578         unsigned total_size, vgpr_offset, sgpr_offset;
1579         u64 gpu_addr;
1580
1581         /* only supported on CZ */
1582         if (adev->asic_type != CHIP_CARRIZO)
1583                 return 0;
1584
1585         /* bail if the compute ring is not ready */
1586         if (!ring->ready)
1587                 return 0;
1588
1589         tmp = RREG32(mmGB_EDC_MODE);
1590         WREG32(mmGB_EDC_MODE, 0);
1591
1592         total_size =
1593                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1594         total_size +=
1595                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1596         total_size +=
1597                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1598         total_size = ALIGN(total_size, 256);
1599         vgpr_offset = total_size;
1600         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1601         sgpr_offset = total_size;
1602         total_size += sizeof(sgpr_init_compute_shader);
1603
1604         /* allocate an indirect buffer to put the commands in */
1605         memset(&ib, 0, sizeof(ib));
1606         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1607         if (r) {
1608                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1609                 return r;
1610         }
1611
1612         /* load the compute shaders */
1613         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1614                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1615
1616         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1617                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1618
1619         /* init the ib length to 0 */
1620         ib.length_dw = 0;
1621
1622         /* VGPR */
1623         /* write the register state for the compute dispatch */
1624         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1625                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1626                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1627                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1628         }
1629         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1630         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1631         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1632         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1633         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1634         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1635
1636         /* write dispatch packet */
1637         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1638         ib.ptr[ib.length_dw++] = 8; /* x */
1639         ib.ptr[ib.length_dw++] = 1; /* y */
1640         ib.ptr[ib.length_dw++] = 1; /* z */
1641         ib.ptr[ib.length_dw++] =
1642                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1643
1644         /* write CS partial flush packet */
1645         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1646         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1647
1648         /* SGPR1 */
1649         /* write the register state for the compute dispatch */
1650         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1651                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1652                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1653                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1654         }
1655         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1656         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1657         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1658         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1659         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1660         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1661
1662         /* write dispatch packet */
1663         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1664         ib.ptr[ib.length_dw++] = 8; /* x */
1665         ib.ptr[ib.length_dw++] = 1; /* y */
1666         ib.ptr[ib.length_dw++] = 1; /* z */
1667         ib.ptr[ib.length_dw++] =
1668                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1669
1670         /* write CS partial flush packet */
1671         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1672         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1673
1674         /* SGPR2 */
1675         /* write the register state for the compute dispatch */
1676         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1677                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1678                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1679                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1680         }
1681         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1682         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1683         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1684         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1685         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1686         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1687
1688         /* write dispatch packet */
1689         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1690         ib.ptr[ib.length_dw++] = 8; /* x */
1691         ib.ptr[ib.length_dw++] = 1; /* y */
1692         ib.ptr[ib.length_dw++] = 1; /* z */
1693         ib.ptr[ib.length_dw++] =
1694                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1695
1696         /* write CS partial flush packet */
1697         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1698         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1699
1700         /* shedule the ib on the ring */
1701         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1702         if (r) {
1703                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1704                 goto fail;
1705         }
1706
1707         /* wait for the GPU to finish processing the IB */
1708         r = fence_wait(f, false);
1709         if (r) {
1710                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1711                 goto fail;
1712         }
1713
1714         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1715         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1716         WREG32(mmGB_EDC_MODE, tmp);
1717
1718         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1719         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1720         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1721
1722
1723         /* read back registers to clear the counters */
1724         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1725                 RREG32(sec_ded_counter_registers[i]);
1726
1727 fail:
1728         amdgpu_ib_free(adev, &ib, NULL);
1729         fence_put(f);
1730
1731         return r;
1732 }
1733
1734 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1735 {
1736         u32 gb_addr_config;
1737         u32 mc_shared_chmap, mc_arb_ramcfg;
1738         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1739         u32 tmp;
1740         int ret;
1741
1742         switch (adev->asic_type) {
1743         case CHIP_TOPAZ:
1744                 adev->gfx.config.max_shader_engines = 1;
1745                 adev->gfx.config.max_tile_pipes = 2;
1746                 adev->gfx.config.max_cu_per_sh = 6;
1747                 adev->gfx.config.max_sh_per_se = 1;
1748                 adev->gfx.config.max_backends_per_se = 2;
1749                 adev->gfx.config.max_texture_channel_caches = 2;
1750                 adev->gfx.config.max_gprs = 256;
1751                 adev->gfx.config.max_gs_threads = 32;
1752                 adev->gfx.config.max_hw_contexts = 8;
1753
1754                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1755                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1756                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1757                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1758                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1759                 break;
1760         case CHIP_FIJI:
1761                 adev->gfx.config.max_shader_engines = 4;
1762                 adev->gfx.config.max_tile_pipes = 16;
1763                 adev->gfx.config.max_cu_per_sh = 16;
1764                 adev->gfx.config.max_sh_per_se = 1;
1765                 adev->gfx.config.max_backends_per_se = 4;
1766                 adev->gfx.config.max_texture_channel_caches = 16;
1767                 adev->gfx.config.max_gprs = 256;
1768                 adev->gfx.config.max_gs_threads = 32;
1769                 adev->gfx.config.max_hw_contexts = 8;
1770
1771                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1772                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1773                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1774                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1775                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1776                 break;
1777         case CHIP_POLARIS11:
1778                 ret = amdgpu_atombios_get_gfx_info(adev);
1779                 if (ret)
1780                         return ret;
1781                 adev->gfx.config.max_gprs = 256;
1782                 adev->gfx.config.max_gs_threads = 32;
1783                 adev->gfx.config.max_hw_contexts = 8;
1784
1785                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1786                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1787                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1788                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1789                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1790                 break;
1791         case CHIP_POLARIS10:
1792                 ret = amdgpu_atombios_get_gfx_info(adev);
1793                 if (ret)
1794                         return ret;
1795                 adev->gfx.config.max_gprs = 256;
1796                 adev->gfx.config.max_gs_threads = 32;
1797                 adev->gfx.config.max_hw_contexts = 8;
1798
1799                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1800                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1801                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1802                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1803                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1804                 break;
1805         case CHIP_TONGA:
1806                 adev->gfx.config.max_shader_engines = 4;
1807                 adev->gfx.config.max_tile_pipes = 8;
1808                 adev->gfx.config.max_cu_per_sh = 8;
1809                 adev->gfx.config.max_sh_per_se = 1;
1810                 adev->gfx.config.max_backends_per_se = 2;
1811                 adev->gfx.config.max_texture_channel_caches = 8;
1812                 adev->gfx.config.max_gprs = 256;
1813                 adev->gfx.config.max_gs_threads = 32;
1814                 adev->gfx.config.max_hw_contexts = 8;
1815
1816                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1817                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1818                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1819                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1820                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1821                 break;
1822         case CHIP_CARRIZO:
1823                 adev->gfx.config.max_shader_engines = 1;
1824                 adev->gfx.config.max_tile_pipes = 2;
1825                 adev->gfx.config.max_sh_per_se = 1;
1826                 adev->gfx.config.max_backends_per_se = 2;
1827
1828                 switch (adev->pdev->revision) {
1829                 case 0xc4:
1830                 case 0x84:
1831                 case 0xc8:
1832                 case 0xcc:
1833                 case 0xe1:
1834                 case 0xe3:
1835                         /* B10 */
1836                         adev->gfx.config.max_cu_per_sh = 8;
1837                         break;
1838                 case 0xc5:
1839                 case 0x81:
1840                 case 0x85:
1841                 case 0xc9:
1842                 case 0xcd:
1843                 case 0xe2:
1844                 case 0xe4:
1845                         /* B8 */
1846                         adev->gfx.config.max_cu_per_sh = 6;
1847                         break;
1848                 case 0xc6:
1849                 case 0xca:
1850                 case 0xce:
1851                 case 0x88:
1852                         /* B6 */
1853                         adev->gfx.config.max_cu_per_sh = 6;
1854                         break;
1855                 case 0xc7:
1856                 case 0x87:
1857                 case 0xcb:
1858                 case 0xe5:
1859                 case 0x89:
1860                 default:
1861                         /* B4 */
1862                         adev->gfx.config.max_cu_per_sh = 4;
1863                         break;
1864                 }
1865
1866                 adev->gfx.config.max_texture_channel_caches = 2;
1867                 adev->gfx.config.max_gprs = 256;
1868                 adev->gfx.config.max_gs_threads = 32;
1869                 adev->gfx.config.max_hw_contexts = 8;
1870
1871                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1872                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1873                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1874                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1875                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1876                 break;
1877         case CHIP_STONEY:
1878                 adev->gfx.config.max_shader_engines = 1;
1879                 adev->gfx.config.max_tile_pipes = 2;
1880                 adev->gfx.config.max_sh_per_se = 1;
1881                 adev->gfx.config.max_backends_per_se = 1;
1882
1883                 switch (adev->pdev->revision) {
1884                 case 0xc0:
1885                 case 0xc1:
1886                 case 0xc2:
1887                 case 0xc4:
1888                 case 0xc8:
1889                 case 0xc9:
1890                         adev->gfx.config.max_cu_per_sh = 3;
1891                         break;
1892                 case 0xd0:
1893                 case 0xd1:
1894                 case 0xd2:
1895                 default:
1896                         adev->gfx.config.max_cu_per_sh = 2;
1897                         break;
1898                 }
1899
1900                 adev->gfx.config.max_texture_channel_caches = 2;
1901                 adev->gfx.config.max_gprs = 256;
1902                 adev->gfx.config.max_gs_threads = 16;
1903                 adev->gfx.config.max_hw_contexts = 8;
1904
1905                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1906                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1907                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1908                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1909                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1910                 break;
1911         default:
1912                 adev->gfx.config.max_shader_engines = 2;
1913                 adev->gfx.config.max_tile_pipes = 4;
1914                 adev->gfx.config.max_cu_per_sh = 2;
1915                 adev->gfx.config.max_sh_per_se = 1;
1916                 adev->gfx.config.max_backends_per_se = 2;
1917                 adev->gfx.config.max_texture_channel_caches = 4;
1918                 adev->gfx.config.max_gprs = 256;
1919                 adev->gfx.config.max_gs_threads = 32;
1920                 adev->gfx.config.max_hw_contexts = 8;
1921
1922                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1923                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1924                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1925                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1926                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1927                 break;
1928         }
1929
1930         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1931         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1932         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1933
1934         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1935         adev->gfx.config.mem_max_burst_length_bytes = 256;
1936         if (adev->flags & AMD_IS_APU) {
1937                 /* Get memory bank mapping mode. */
1938                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1939                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1940                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1941
1942                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1943                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1944                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1945
1946                 /* Validate settings in case only one DIMM installed. */
1947                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1948                         dimm00_addr_map = 0;
1949                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1950                         dimm01_addr_map = 0;
1951                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1952                         dimm10_addr_map = 0;
1953                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1954                         dimm11_addr_map = 0;
1955
1956                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1957                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1958                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1959                         adev->gfx.config.mem_row_size_in_kb = 2;
1960                 else
1961                         adev->gfx.config.mem_row_size_in_kb = 1;
1962         } else {
1963                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1964                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1965                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1966                         adev->gfx.config.mem_row_size_in_kb = 4;
1967         }
1968
1969         adev->gfx.config.shader_engine_tile_size = 32;
1970         adev->gfx.config.num_gpus = 1;
1971         adev->gfx.config.multi_gpu_tile_size = 64;
1972
1973         /* fix up row size */
1974         switch (adev->gfx.config.mem_row_size_in_kb) {
1975         case 1:
1976         default:
1977                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1978                 break;
1979         case 2:
1980                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1981                 break;
1982         case 4:
1983                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1984                 break;
1985         }
1986         adev->gfx.config.gb_addr_config = gb_addr_config;
1987
1988         return 0;
1989 }
1990
1991 static int gfx_v8_0_sw_init(void *handle)
1992 {
1993         int i, r;
1994         struct amdgpu_ring *ring;
1995         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1996
1997         /* EOP Event */
1998         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1999         if (r)
2000                 return r;
2001
2002         /* Privileged reg */
2003         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
2004         if (r)
2005                 return r;
2006
2007         /* Privileged inst */
2008         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2009         if (r)
2010                 return r;
2011
2012         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2013
2014         gfx_v8_0_scratch_init(adev);
2015
2016         r = gfx_v8_0_init_microcode(adev);
2017         if (r) {
2018                 DRM_ERROR("Failed to load gfx firmware!\n");
2019                 return r;
2020         }
2021
2022         r = gfx_v8_0_rlc_init(adev);
2023         if (r) {
2024                 DRM_ERROR("Failed to init rlc BOs!\n");
2025                 return r;
2026         }
2027
2028         r = gfx_v8_0_mec_init(adev);
2029         if (r) {
2030                 DRM_ERROR("Failed to init MEC BOs!\n");
2031                 return r;
2032         }
2033
2034         /* set up the gfx ring */
2035         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2036                 ring = &adev->gfx.gfx_ring[i];
2037                 ring->ring_obj = NULL;
2038                 sprintf(ring->name, "gfx");
2039                 /* no gfx doorbells on iceland */
2040                 if (adev->asic_type != CHIP_TOPAZ) {
2041                         ring->use_doorbell = true;
2042                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2043                 }
2044
2045                 r = amdgpu_ring_init(adev, ring, 1024,
2046                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2047                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
2048                                      AMDGPU_RING_TYPE_GFX);
2049                 if (r)
2050                         return r;
2051         }
2052
2053         /* set up the compute queues */
2054         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2055                 unsigned irq_type;
2056
2057                 /* max 32 queues per MEC */
2058                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2059                         DRM_ERROR("Too many (%d) compute rings!\n", i);
2060                         break;
2061                 }
2062                 ring = &adev->gfx.compute_ring[i];
2063                 ring->ring_obj = NULL;
2064                 ring->use_doorbell = true;
2065                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2066                 ring->me = 1; /* first MEC */
2067                 ring->pipe = i / 8;
2068                 ring->queue = i % 8;
2069                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2070                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2071                 /* type-2 packets are deprecated on MEC, use type-3 instead */
2072                 r = amdgpu_ring_init(adev, ring, 1024,
2073                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2074                                      &adev->gfx.eop_irq, irq_type,
2075                                      AMDGPU_RING_TYPE_COMPUTE);
2076                 if (r)
2077                         return r;
2078         }
2079
2080         /* reserve GDS, GWS and OA resource for gfx */
2081         r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
2082                         PAGE_SIZE, true,
2083                         AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
2084                         NULL, &adev->gds.gds_gfx_bo);
2085         if (r)
2086                 return r;
2087
2088         r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
2089                 PAGE_SIZE, true,
2090                 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
2091                 NULL, &adev->gds.gws_gfx_bo);
2092         if (r)
2093                 return r;
2094
2095         r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
2096                         PAGE_SIZE, true,
2097                         AMDGPU_GEM_DOMAIN_OA, 0, NULL,
2098                         NULL, &adev->gds.oa_gfx_bo);
2099         if (r)
2100                 return r;
2101
2102         adev->gfx.ce_ram_size = 0x8000;
2103
2104         r = gfx_v8_0_gpu_early_init(adev);
2105         if (r)
2106                 return r;
2107
2108         return 0;
2109 }
2110
2111 static int gfx_v8_0_sw_fini(void *handle)
2112 {
2113         int i;
2114         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2115
2116         amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
2117         amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
2118         amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
2119
2120         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2121                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2122         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2123                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2124
2125         gfx_v8_0_mec_fini(adev);
2126
2127         gfx_v8_0_rlc_fini(adev);
2128
2129         gfx_v8_0_free_microcode(adev);
2130
2131         return 0;
2132 }
2133
2134 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2135 {
2136         uint32_t *modearray, *mod2array;
2137         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2138         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2139         u32 reg_offset;
2140
2141         modearray = adev->gfx.config.tile_mode_array;
2142         mod2array = adev->gfx.config.macrotile_mode_array;
2143
2144         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2145                 modearray[reg_offset] = 0;
2146
2147         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2148                 mod2array[reg_offset] = 0;
2149
2150         switch (adev->asic_type) {
2151         case CHIP_TOPAZ:
2152                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2153                                 PIPE_CONFIG(ADDR_SURF_P2) |
2154                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2155                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2156                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2157                                 PIPE_CONFIG(ADDR_SURF_P2) |
2158                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2159                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2160                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2161                                 PIPE_CONFIG(ADDR_SURF_P2) |
2162                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2163                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2164                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2165                                 PIPE_CONFIG(ADDR_SURF_P2) |
2166                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2167                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2168                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2169                                 PIPE_CONFIG(ADDR_SURF_P2) |
2170                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2171                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2172                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2173                                 PIPE_CONFIG(ADDR_SURF_P2) |
2174                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2175                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2176                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2177                                 PIPE_CONFIG(ADDR_SURF_P2) |
2178                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2179                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2180                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2181                                 PIPE_CONFIG(ADDR_SURF_P2));
2182                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2183                                 PIPE_CONFIG(ADDR_SURF_P2) |
2184                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2185                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2186                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2187                                  PIPE_CONFIG(ADDR_SURF_P2) |
2188                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2189                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2190                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2191                                  PIPE_CONFIG(ADDR_SURF_P2) |
2192                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2193                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2194                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2195                                  PIPE_CONFIG(ADDR_SURF_P2) |
2196                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2197                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2198                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2199                                  PIPE_CONFIG(ADDR_SURF_P2) |
2200                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2201                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2202                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2203                                  PIPE_CONFIG(ADDR_SURF_P2) |
2204                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2205                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2206                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2207                                  PIPE_CONFIG(ADDR_SURF_P2) |
2208                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2209                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2210                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2211                                  PIPE_CONFIG(ADDR_SURF_P2) |
2212                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2213                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2214                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2215                                  PIPE_CONFIG(ADDR_SURF_P2) |
2216                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2217                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2218                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2219                                  PIPE_CONFIG(ADDR_SURF_P2) |
2220                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2221                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2222                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2223                                  PIPE_CONFIG(ADDR_SURF_P2) |
2224                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2225                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2226                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2227                                  PIPE_CONFIG(ADDR_SURF_P2) |
2228                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2229                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2230                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2231                                  PIPE_CONFIG(ADDR_SURF_P2) |
2232                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2233                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2234                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2235                                  PIPE_CONFIG(ADDR_SURF_P2) |
2236                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2237                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2238                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2239                                  PIPE_CONFIG(ADDR_SURF_P2) |
2240                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2241                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2242                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2243                                  PIPE_CONFIG(ADDR_SURF_P2) |
2244                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2245                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2246                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2247                                  PIPE_CONFIG(ADDR_SURF_P2) |
2248                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2249                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2250                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2251                                  PIPE_CONFIG(ADDR_SURF_P2) |
2252                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2253                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2254
2255                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2256                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2257                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258                                 NUM_BANKS(ADDR_SURF_8_BANK));
2259                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2260                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2261                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2262                                 NUM_BANKS(ADDR_SURF_8_BANK));
2263                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2264                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2265                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2266                                 NUM_BANKS(ADDR_SURF_8_BANK));
2267                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2269                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2270                                 NUM_BANKS(ADDR_SURF_8_BANK));
2271                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2272                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2273                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2274                                 NUM_BANKS(ADDR_SURF_8_BANK));
2275                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2276                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2277                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2278                                 NUM_BANKS(ADDR_SURF_8_BANK));
2279                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2280                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2281                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2282                                 NUM_BANKS(ADDR_SURF_8_BANK));
2283                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2284                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2285                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2286                                 NUM_BANKS(ADDR_SURF_16_BANK));
2287                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2288                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2289                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2290                                 NUM_BANKS(ADDR_SURF_16_BANK));
2291                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2292                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2293                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294                                  NUM_BANKS(ADDR_SURF_16_BANK));
2295                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2296                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2297                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2298                                  NUM_BANKS(ADDR_SURF_16_BANK));
2299                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2300                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2301                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2302                                  NUM_BANKS(ADDR_SURF_16_BANK));
2303                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2305                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2306                                  NUM_BANKS(ADDR_SURF_16_BANK));
2307                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2308                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2309                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2310                                  NUM_BANKS(ADDR_SURF_8_BANK));
2311
2312                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2313                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2314                             reg_offset != 23)
2315                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2316
2317                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2318                         if (reg_offset != 7)
2319                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2320
2321                 break;
2322         case CHIP_FIJI:
2323                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2326                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2327                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2330                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2331                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2332                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2334                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2335                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2336                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2338                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2339                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2342                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2343                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2344                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2345                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2346                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2347                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2348                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2349                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2350                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2351                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2352                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2353                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2354                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2355                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2356                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2357                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2358                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2360                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2361                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2363                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2364                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2365                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2366                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2367                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2368                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2369                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2370                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2371                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2372                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2373                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2374                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2375                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2376                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2377                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2378                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2381                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2382                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2384                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2385                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2386                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2388                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2389                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2390                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2391                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2392                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2393                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2394                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2396                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2397                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2398                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2400                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2401                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2402                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2404                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2405                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2406                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2408                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2409                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2410                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2412                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2413                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2414                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2415                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2416                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2417                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2418                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2420                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2421                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2422                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2424                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2425                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2426                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2428                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2429                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2432                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2434                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2436                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2438                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2440                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2441                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2442                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2443                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2444                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2445
2446                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2448                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2449                                 NUM_BANKS(ADDR_SURF_8_BANK));
2450                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2452                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2453                                 NUM_BANKS(ADDR_SURF_8_BANK));
2454                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2456                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2457                                 NUM_BANKS(ADDR_SURF_8_BANK));
2458                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2460                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2461                                 NUM_BANKS(ADDR_SURF_8_BANK));
2462                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2464                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2465                                 NUM_BANKS(ADDR_SURF_8_BANK));
2466                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2468                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2469                                 NUM_BANKS(ADDR_SURF_8_BANK));
2470                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2472                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2473                                 NUM_BANKS(ADDR_SURF_8_BANK));
2474                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2476                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2477                                 NUM_BANKS(ADDR_SURF_8_BANK));
2478                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2480                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2481                                 NUM_BANKS(ADDR_SURF_8_BANK));
2482                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2484                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2485                                  NUM_BANKS(ADDR_SURF_8_BANK));
2486                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2488                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2489                                  NUM_BANKS(ADDR_SURF_8_BANK));
2490                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2492                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2493                                  NUM_BANKS(ADDR_SURF_8_BANK));
2494                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2496                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2497                                  NUM_BANKS(ADDR_SURF_8_BANK));
2498                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2500                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2501                                  NUM_BANKS(ADDR_SURF_4_BANK));
2502
2503                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2504                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2505
2506                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2507                         if (reg_offset != 7)
2508                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2509
2510                 break;
2511         case CHIP_TONGA:
2512                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2515                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2516                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2519                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2520                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2523                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2524                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2527                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2528                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2531                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2532                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2533                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2535                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2537                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2539                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2540                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2541                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2542                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2543                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2544                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2545                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2546                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2547                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2549                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2550                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2551                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2553                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2554                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2555                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2557                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2558                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2559                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2560                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2561                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2562                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2563                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2564                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2565                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2566                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2567                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2570                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2571                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2572                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2573                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2574                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2575                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2577                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2578                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2579                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2580                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2581                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2582                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2583                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2585                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2586                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2587                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2589                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2590                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2591                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2593                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2594                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2595                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2597                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2598                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2599                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2601                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2602                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2603                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2604                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2605                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2606                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2607                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2608                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2609                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2610                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2611                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2613                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2614                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2615                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2616                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2617                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2618                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2619                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2620                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2621                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2622                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2623                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2625                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2626                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2627                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2628                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2629                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2630                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2631                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2632                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2633                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2634
2635                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2637                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638                                 NUM_BANKS(ADDR_SURF_16_BANK));
2639                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2641                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2642                                 NUM_BANKS(ADDR_SURF_16_BANK));
2643                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2645                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2646                                 NUM_BANKS(ADDR_SURF_16_BANK));
2647                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2649                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2650                                 NUM_BANKS(ADDR_SURF_16_BANK));
2651                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2653                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2654                                 NUM_BANKS(ADDR_SURF_16_BANK));
2655                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2658                                 NUM_BANKS(ADDR_SURF_16_BANK));
2659                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2661                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2662                                 NUM_BANKS(ADDR_SURF_16_BANK));
2663                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2665                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2666                                 NUM_BANKS(ADDR_SURF_16_BANK));
2667                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2669                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2670                                 NUM_BANKS(ADDR_SURF_16_BANK));
2671                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2672                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2673                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2674                                  NUM_BANKS(ADDR_SURF_16_BANK));
2675                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2676                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2677                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2678                                  NUM_BANKS(ADDR_SURF_16_BANK));
2679                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2682                                  NUM_BANKS(ADDR_SURF_8_BANK));
2683                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2684                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2685                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2686                                  NUM_BANKS(ADDR_SURF_4_BANK));
2687                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2688                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2689                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2690                                  NUM_BANKS(ADDR_SURF_4_BANK));
2691
2692                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2693                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2694
2695                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2696                         if (reg_offset != 7)
2697                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2698
2699                 break;
2700         case CHIP_POLARIS11:
2701                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2702                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2704                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2705                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2708                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2709                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2710                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2712                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2713                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2714                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2716                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2717                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2718                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2720                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2721                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2722                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2724                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2725                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2726                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2727                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2728                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2729                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2730                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2731                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2732                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2733                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2734                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2735                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2736                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2738                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2739                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2740                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2742                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2743                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2744                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2746                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2747                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2748                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2750                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2751                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2752                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2754                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2755                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2756                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2758                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2759                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2760                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2762                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2763                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2764                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2766                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2767                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2768                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2770                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2771                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2772                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2774                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2775                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2776                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2778                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2779                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2780                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2782                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2783                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2784                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2786                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2787                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2788                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2790                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2791                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2792                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2794                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2795                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2796                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2798                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2799                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2800                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2802                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2803                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2804                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2806                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2807                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2808                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2810                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2812                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2814                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2816                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2818                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2819                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2822                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2823
2824                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2826                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2827                                 NUM_BANKS(ADDR_SURF_16_BANK));
2828
2829                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2831                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2832                                 NUM_BANKS(ADDR_SURF_16_BANK));
2833
2834                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2835                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2836                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2837                                 NUM_BANKS(ADDR_SURF_16_BANK));
2838
2839                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2840                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2841                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2842                                 NUM_BANKS(ADDR_SURF_16_BANK));
2843
2844                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2847                                 NUM_BANKS(ADDR_SURF_16_BANK));
2848
2849                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2851                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2852                                 NUM_BANKS(ADDR_SURF_16_BANK));
2853
2854                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2855                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2856                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2857                                 NUM_BANKS(ADDR_SURF_16_BANK));
2858
2859                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2860                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2861                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2862                                 NUM_BANKS(ADDR_SURF_16_BANK));
2863
2864                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2865                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2866                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2867                                 NUM_BANKS(ADDR_SURF_16_BANK));
2868
2869                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2871                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2872                                 NUM_BANKS(ADDR_SURF_16_BANK));
2873
2874                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2875                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2876                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2877                                 NUM_BANKS(ADDR_SURF_16_BANK));
2878
2879                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2880                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2881                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2882                                 NUM_BANKS(ADDR_SURF_16_BANK));
2883
2884                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2885                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2886                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2887                                 NUM_BANKS(ADDR_SURF_8_BANK));
2888
2889                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2890                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2891                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2892                                 NUM_BANKS(ADDR_SURF_4_BANK));
2893
2894                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2895                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2896
2897                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2898                         if (reg_offset != 7)
2899                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2900
2901                 break;
2902         case CHIP_POLARIS10:
2903                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2905                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2906                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2907                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2908                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2909                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2910                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2911                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2912                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2914                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2915                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2916                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2917                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2918                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2921                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2922                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2923                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2924                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2925                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2926                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2927                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2928                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2929                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2930                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2931                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2932                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2933                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2934                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2935                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2936                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2937                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2938                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2940                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2941                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2942                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2944                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2946                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2948                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2949                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2950                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2951                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2952                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2953                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2954                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2955                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2956                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2957                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2958                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2961                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2962                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2964                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2965                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2966                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2968                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2969                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2970                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2971                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2972                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2973                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2974                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2976                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2977                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2978                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2979                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2980                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2981                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2982                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2984                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2985                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2986                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2988                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2989                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2990                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2991                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2992                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2993                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2994                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2995                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2996                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2997                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2998                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3000                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3001                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3002                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3004                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3005                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3006                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3007                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3008                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3009                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3010                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3012                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3013                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3014                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3015                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3016                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3018                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3019                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3020                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3021                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3022                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3023                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3024                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3025
3026                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3027                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3028                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3029                                 NUM_BANKS(ADDR_SURF_16_BANK));
3030
3031                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3032                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3033                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3034                                 NUM_BANKS(ADDR_SURF_16_BANK));
3035
3036                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3037                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3038                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3039                                 NUM_BANKS(ADDR_SURF_16_BANK));
3040
3041                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3042                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3043                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3044                                 NUM_BANKS(ADDR_SURF_16_BANK));
3045
3046                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3047                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3048                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3049                                 NUM_BANKS(ADDR_SURF_16_BANK));
3050
3051                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3052                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3053                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3054                                 NUM_BANKS(ADDR_SURF_16_BANK));
3055
3056                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3057                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3058                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3059                                 NUM_BANKS(ADDR_SURF_16_BANK));
3060
3061                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3062                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3063                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3064                                 NUM_BANKS(ADDR_SURF_16_BANK));
3065
3066                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3067                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3068                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3069                                 NUM_BANKS(ADDR_SURF_16_BANK));
3070
3071                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3072                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3073                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3074                                 NUM_BANKS(ADDR_SURF_16_BANK));
3075
3076                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3077                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3078                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3079                                 NUM_BANKS(ADDR_SURF_16_BANK));
3080
3081                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3083                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3084                                 NUM_BANKS(ADDR_SURF_8_BANK));
3085
3086                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3087                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3088                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3089                                 NUM_BANKS(ADDR_SURF_4_BANK));
3090
3091                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3092                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3093                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3094                                 NUM_BANKS(ADDR_SURF_4_BANK));
3095
3096                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3097                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3098
3099                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3100                         if (reg_offset != 7)
3101                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3102
3103                 break;
3104         case CHIP_STONEY:
3105                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3106                                 PIPE_CONFIG(ADDR_SURF_P2) |
3107                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3108                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3109                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3110                                 PIPE_CONFIG(ADDR_SURF_P2) |
3111                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3112                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3113                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3114                                 PIPE_CONFIG(ADDR_SURF_P2) |
3115                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3116                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3117                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3118                                 PIPE_CONFIG(ADDR_SURF_P2) |
3119                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3120                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3121                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3122                                 PIPE_CONFIG(ADDR_SURF_P2) |
3123                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3124                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3125                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3126                                 PIPE_CONFIG(ADDR_SURF_P2) |
3127                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3128                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3129                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3130                                 PIPE_CONFIG(ADDR_SURF_P2) |
3131                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3132                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3133                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3134                                 PIPE_CONFIG(ADDR_SURF_P2));
3135                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3136                                 PIPE_CONFIG(ADDR_SURF_P2) |
3137                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3138                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3139                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3140                                  PIPE_CONFIG(ADDR_SURF_P2) |
3141                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3142                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3143                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3144                                  PIPE_CONFIG(ADDR_SURF_P2) |
3145                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3146                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3147                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3148                                  PIPE_CONFIG(ADDR_SURF_P2) |
3149                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3150                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3151                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3152                                  PIPE_CONFIG(ADDR_SURF_P2) |
3153                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3154                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3155                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3156                                  PIPE_CONFIG(ADDR_SURF_P2) |
3157                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3158                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3159                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3160                                  PIPE_CONFIG(ADDR_SURF_P2) |
3161                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3162                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3163                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3164                                  PIPE_CONFIG(ADDR_SURF_P2) |
3165                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3166                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3167                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3168                                  PIPE_CONFIG(ADDR_SURF_P2) |
3169                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3170                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3171                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3172                                  PIPE_CONFIG(ADDR_SURF_P2) |
3173                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3174                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3175                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3176                                  PIPE_CONFIG(ADDR_SURF_P2) |
3177                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3178                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3179                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3180                                  PIPE_CONFIG(ADDR_SURF_P2) |
3181                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3182                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3183                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3184                                  PIPE_CONFIG(ADDR_SURF_P2) |
3185                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3186                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3187                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3188                                  PIPE_CONFIG(ADDR_SURF_P2) |
3189                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3190                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3191                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3192                                  PIPE_CONFIG(ADDR_SURF_P2) |
3193                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3194                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3195                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3196                                  PIPE_CONFIG(ADDR_SURF_P2) |
3197                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3198                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3199                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3200                                  PIPE_CONFIG(ADDR_SURF_P2) |
3201                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3202                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3203                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3204                                  PIPE_CONFIG(ADDR_SURF_P2) |
3205                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3206                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3207
3208                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3209                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3210                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3211                                 NUM_BANKS(ADDR_SURF_8_BANK));
3212                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3213                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3214                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3215                                 NUM_BANKS(ADDR_SURF_8_BANK));
3216                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3217                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3218                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3219                                 NUM_BANKS(ADDR_SURF_8_BANK));
3220                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3221                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3222                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3223                                 NUM_BANKS(ADDR_SURF_8_BANK));
3224                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3225                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3226                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3227                                 NUM_BANKS(ADDR_SURF_8_BANK));
3228                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3229                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3230                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3231                                 NUM_BANKS(ADDR_SURF_8_BANK));
3232                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3233                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3234                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3235                                 NUM_BANKS(ADDR_SURF_8_BANK));
3236                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3237                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3238                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3239                                 NUM_BANKS(ADDR_SURF_16_BANK));
3240                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3241                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3242                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3243                                 NUM_BANKS(ADDR_SURF_16_BANK));
3244                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3245                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3246                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3247                                  NUM_BANKS(ADDR_SURF_16_BANK));
3248                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3249                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3250                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3251                                  NUM_BANKS(ADDR_SURF_16_BANK));
3252                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3253                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3254                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3255                                  NUM_BANKS(ADDR_SURF_16_BANK));
3256                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3257                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3258                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3259                                  NUM_BANKS(ADDR_SURF_16_BANK));
3260                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3261                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3262                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3263                                  NUM_BANKS(ADDR_SURF_8_BANK));
3264
3265                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3266                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3267                             reg_offset != 23)
3268                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3269
3270                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3271                         if (reg_offset != 7)
3272                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3273
3274                 break;
3275         default:
3276                 dev_warn(adev->dev,
3277                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3278                          adev->asic_type);
3279
3280         case CHIP_CARRIZO:
3281                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3282                                 PIPE_CONFIG(ADDR_SURF_P2) |
3283                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3284                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3285                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3286                                 PIPE_CONFIG(ADDR_SURF_P2) |
3287                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3288                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3289                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3290                                 PIPE_CONFIG(ADDR_SURF_P2) |
3291                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3292                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3293                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3294                                 PIPE_CONFIG(ADDR_SURF_P2) |
3295                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3296                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3297                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3298                                 PIPE_CONFIG(ADDR_SURF_P2) |
3299                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3300                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3301                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3302                                 PIPE_CONFIG(ADDR_SURF_P2) |
3303                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3304                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3305                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3306                                 PIPE_CONFIG(ADDR_SURF_P2) |
3307                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3308                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3309                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3310                                 PIPE_CONFIG(ADDR_SURF_P2));
3311                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3312                                 PIPE_CONFIG(ADDR_SURF_P2) |
3313                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3314                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3315                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3316                                  PIPE_CONFIG(ADDR_SURF_P2) |
3317                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3318                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3319                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3320                                  PIPE_CONFIG(ADDR_SURF_P2) |
3321                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3322                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3323                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3324                                  PIPE_CONFIG(ADDR_SURF_P2) |
3325                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3326                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3327                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3328                                  PIPE_CONFIG(ADDR_SURF_P2) |
3329                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3330                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3331                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3332                                  PIPE_CONFIG(ADDR_SURF_P2) |
3333                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3334                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3335                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3336                                  PIPE_CONFIG(ADDR_SURF_P2) |
3337                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3338                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3339                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3340                                  PIPE_CONFIG(ADDR_SURF_P2) |
3341                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3342                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3343                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3344                                  PIPE_CONFIG(ADDR_SURF_P2) |
3345                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3346                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3347                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3348                                  PIPE_CONFIG(ADDR_SURF_P2) |
3349                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3350                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3351                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3352                                  PIPE_CONFIG(ADDR_SURF_P2) |
3353                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3354                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3355                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3356                                  PIPE_CONFIG(ADDR_SURF_P2) |
3357                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3358                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3359                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3360                                  PIPE_CONFIG(ADDR_SURF_P2) |
3361                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3362                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3363                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3364                                  PIPE_CONFIG(ADDR_SURF_P2) |
3365                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3366                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3367                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3368                                  PIPE_CONFIG(ADDR_SURF_P2) |
3369                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3370                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3371                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3372                                  PIPE_CONFIG(ADDR_SURF_P2) |
3373                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3374                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3375                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3376                                  PIPE_CONFIG(ADDR_SURF_P2) |
3377                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3378                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3379                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3380                                  PIPE_CONFIG(ADDR_SURF_P2) |
3381                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3382                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3383
3384                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3385                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3386                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3387                                 NUM_BANKS(ADDR_SURF_8_BANK));
3388                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3389                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3390                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3391                                 NUM_BANKS(ADDR_SURF_8_BANK));
3392                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3393                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3394                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3395                                 NUM_BANKS(ADDR_SURF_8_BANK));
3396                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3397                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3398                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3399                                 NUM_BANKS(ADDR_SURF_8_BANK));
3400                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3401                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3402                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3403                                 NUM_BANKS(ADDR_SURF_8_BANK));
3404                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3405                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3406                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3407                                 NUM_BANKS(ADDR_SURF_8_BANK));
3408                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3409                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3410                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3411                                 NUM_BANKS(ADDR_SURF_8_BANK));
3412                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3413                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3414                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3415                                 NUM_BANKS(ADDR_SURF_16_BANK));
3416                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3417                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3418                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3419                                 NUM_BANKS(ADDR_SURF_16_BANK));
3420                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3421                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3422                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3423                                  NUM_BANKS(ADDR_SURF_16_BANK));
3424                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3425                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3426                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3427                                  NUM_BANKS(ADDR_SURF_16_BANK));
3428                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3429                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3430                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3431                                  NUM_BANKS(ADDR_SURF_16_BANK));
3432                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3433                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3434                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3435                                  NUM_BANKS(ADDR_SURF_16_BANK));
3436                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3437                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3438                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3439                                  NUM_BANKS(ADDR_SURF_8_BANK));
3440
3441                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3442                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3443                             reg_offset != 23)
3444                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3445
3446                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3447                         if (reg_offset != 7)
3448                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3449
3450                 break;
3451         }
3452 }
3453
3454 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3455                                   u32 se_num, u32 sh_num, u32 instance)
3456 {
3457         u32 data;
3458
3459         if (instance == 0xffffffff)
3460                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3461         else
3462                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3463
3464         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
3465                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3466                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3467         } else if (se_num == 0xffffffff) {
3468                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3469                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3470         } else if (sh_num == 0xffffffff) {
3471                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3472                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3473         } else {
3474                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3475                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3476         }
3477         WREG32(mmGRBM_GFX_INDEX, data);
3478 }
3479
3480 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3481 {
3482         return (u32)((1ULL << bit_width) - 1);
3483 }
3484
3485 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3486 {
3487         u32 data, mask;
3488
3489         data = RREG32(mmCC_RB_BACKEND_DISABLE);
3490         data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3491
3492         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
3493         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
3494
3495         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3496                                        adev->gfx.config.max_sh_per_se);
3497
3498         return (~data) & mask;
3499 }
3500
3501 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3502 {
3503         int i, j;
3504         u32 data;
3505         u32 active_rbs = 0;
3506         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3507                                         adev->gfx.config.max_sh_per_se;
3508
3509         mutex_lock(&adev->grbm_idx_mutex);
3510         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3511                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3512                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3513                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3514                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3515                                                rb_bitmap_width_per_sh);
3516                 }
3517         }
3518         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3519         mutex_unlock(&adev->grbm_idx_mutex);
3520
3521         adev->gfx.config.backend_enable_mask = active_rbs;
3522         adev->gfx.config.num_rbs = hweight32(active_rbs);
3523 }
3524
3525 /**
3526  * gfx_v8_0_init_compute_vmid - gart enable
3527  *
3528  * @rdev: amdgpu_device pointer
3529  *
3530  * Initialize compute vmid sh_mem registers
3531  *
3532  */
3533 #define DEFAULT_SH_MEM_BASES    (0x6000)
3534 #define FIRST_COMPUTE_VMID      (8)
3535 #define LAST_COMPUTE_VMID       (16)
3536 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3537 {
3538         int i;
3539         uint32_t sh_mem_config;
3540         uint32_t sh_mem_bases;
3541
3542         /*
3543          * Configure apertures:
3544          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3545          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3546          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3547          */
3548         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3549
3550         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3551                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3552                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3553                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3554                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3555                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3556
3557         mutex_lock(&adev->srbm_mutex);
3558         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3559                 vi_srbm_select(adev, 0, 0, 0, i);
3560                 /* CP and shaders */
3561                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3562                 WREG32(mmSH_MEM_APE1_BASE, 1);
3563                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3564                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3565         }
3566         vi_srbm_select(adev, 0, 0, 0, 0);
3567         mutex_unlock(&adev->srbm_mutex);
3568 }
3569
3570 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3571 {
3572         u32 tmp;
3573         int i;
3574
3575         tmp = RREG32(mmGRBM_CNTL);
3576         tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
3577         WREG32(mmGRBM_CNTL, tmp);
3578
3579         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3580         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3581         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3582
3583         gfx_v8_0_tiling_mode_table_init(adev);
3584
3585         gfx_v8_0_setup_rb(adev);
3586         gfx_v8_0_get_cu_info(adev);
3587
3588         /* XXX SH_MEM regs */
3589         /* where to put LDS, scratch, GPUVM in FSA64 space */
3590         mutex_lock(&adev->srbm_mutex);
3591         for (i = 0; i < 16; i++) {
3592                 vi_srbm_select(adev, 0, 0, 0, i);
3593                 /* CP and shaders */
3594                 if (i == 0) {
3595                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3596                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3597                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3598                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3599                         WREG32(mmSH_MEM_CONFIG, tmp);
3600                 } else {
3601                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3602                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3603                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3604                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3605                         WREG32(mmSH_MEM_CONFIG, tmp);
3606                 }
3607
3608                 WREG32(mmSH_MEM_APE1_BASE, 1);
3609                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3610                 WREG32(mmSH_MEM_BASES, 0);
3611         }
3612         vi_srbm_select(adev, 0, 0, 0, 0);
3613         mutex_unlock(&adev->srbm_mutex);
3614
3615         gfx_v8_0_init_compute_vmid(adev);
3616
3617         mutex_lock(&adev->grbm_idx_mutex);
3618         /*
3619          * making sure that the following register writes will be broadcasted
3620          * to all the shaders
3621          */
3622         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3623
3624         WREG32(mmPA_SC_FIFO_SIZE,
3625                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3626                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3627                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3628                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3629                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3630                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3631                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3632                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3633         mutex_unlock(&adev->grbm_idx_mutex);
3634
3635 }
3636
3637 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3638 {
3639         u32 i, j, k;
3640         u32 mask;
3641
3642         mutex_lock(&adev->grbm_idx_mutex);
3643         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3644                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3645                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3646                         for (k = 0; k < adev->usec_timeout; k++) {
3647                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3648                                         break;
3649                                 udelay(1);
3650                         }
3651                 }
3652         }
3653         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3654         mutex_unlock(&adev->grbm_idx_mutex);
3655
3656         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3657                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3658                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3659                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3660         for (k = 0; k < adev->usec_timeout; k++) {
3661                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3662                         break;
3663                 udelay(1);
3664         }
3665 }
3666
3667 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3668                                                bool enable)
3669 {
3670         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3671
3672         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3673         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3674         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3675         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3676
3677         WREG32(mmCP_INT_CNTL_RING0, tmp);
3678 }
3679
3680 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3681 {
3682         /* csib */
3683         WREG32(mmRLC_CSIB_ADDR_HI,
3684                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3685         WREG32(mmRLC_CSIB_ADDR_LO,
3686                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3687         WREG32(mmRLC_CSIB_LENGTH,
3688                         adev->gfx.rlc.clear_state_size);
3689 }
3690
3691 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3692                                 int ind_offset,
3693                                 int list_size,
3694                                 int *unique_indices,
3695                                 int *indices_count,
3696                                 int max_indices,
3697                                 int *ind_start_offsets,
3698                                 int *offset_count,
3699                                 int max_offset)
3700 {
3701         int indices;
3702         bool new_entry = true;
3703
3704         for (; ind_offset < list_size; ind_offset++) {
3705
3706                 if (new_entry) {
3707                         new_entry = false;
3708                         ind_start_offsets[*offset_count] = ind_offset;
3709                         *offset_count = *offset_count + 1;
3710                         BUG_ON(*offset_count >= max_offset);
3711                 }
3712
3713                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3714                         new_entry = true;
3715                         continue;
3716                 }
3717
3718                 ind_offset += 2;
3719
3720                 /* look for the matching indice */
3721                 for (indices = 0;
3722                         indices < *indices_count;
3723                         indices++) {
3724                         if (unique_indices[indices] ==
3725                                 register_list_format[ind_offset])
3726                                 break;
3727                 }
3728
3729                 if (indices >= *indices_count) {
3730                         unique_indices[*indices_count] =
3731                                 register_list_format[ind_offset];
3732                         indices = *indices_count;
3733                         *indices_count = *indices_count + 1;
3734                         BUG_ON(*indices_count >= max_indices);
3735                 }
3736
3737                 register_list_format[ind_offset] = indices;
3738         }
3739 }
3740
3741 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3742 {
3743         int i, temp, data;
3744         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3745         int indices_count = 0;
3746         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3747         int offset_count = 0;
3748
3749         int list_size;
3750         unsigned int *register_list_format =
3751                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3752         if (register_list_format == NULL)
3753                 return -ENOMEM;
3754         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3755                         adev->gfx.rlc.reg_list_format_size_bytes);
3756
3757         gfx_v8_0_parse_ind_reg_list(register_list_format,
3758                                 RLC_FormatDirectRegListLength,
3759                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3760                                 unique_indices,
3761                                 &indices_count,
3762                                 sizeof(unique_indices) / sizeof(int),
3763                                 indirect_start_offsets,
3764                                 &offset_count,
3765                                 sizeof(indirect_start_offsets)/sizeof(int));
3766
3767         /* save and restore list */
3768         temp = RREG32(mmRLC_SRM_CNTL);
3769         temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
3770         WREG32(mmRLC_SRM_CNTL, temp);
3771
3772         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3773         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3774                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3775
3776         /* indirect list */
3777         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3778         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3779                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3780
3781         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3782         list_size = list_size >> 1;
3783         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3784         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3785
3786         /* starting offsets starts */
3787         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3788                 adev->gfx.rlc.starting_offsets_start);
3789         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3790                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3791                                 indirect_start_offsets[i]);
3792
3793         /* unique indices */
3794         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3795         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3796         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3797                 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3798                 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3799         }
3800         kfree(register_list_format);
3801
3802         return 0;
3803 }
3804
3805 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3806 {
3807         uint32_t data;
3808
3809         data = RREG32(mmRLC_SRM_CNTL);
3810         data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
3811         WREG32(mmRLC_SRM_CNTL, data);
3812 }
3813
3814 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3815 {
3816         uint32_t data;
3817
3818         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3819                               AMD_PG_SUPPORT_GFX_SMG |
3820                               AMD_PG_SUPPORT_GFX_DMG)) {
3821                 data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3822                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3823                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3824                 WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3825
3826                 data = 0;
3827                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
3828                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
3829                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
3830                 data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
3831                 WREG32(mmRLC_PG_DELAY, data);
3832
3833                 data = RREG32(mmRLC_PG_DELAY_2);
3834                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
3835                 data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
3836                 WREG32(mmRLC_PG_DELAY_2, data);
3837
3838                 data = RREG32(mmRLC_AUTO_PG_CTRL);
3839                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3840                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3841                 WREG32(mmRLC_AUTO_PG_CTRL, data);
3842         }
3843 }
3844
3845 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3846                                                 bool enable)
3847 {
3848         u32 data, orig;
3849
3850         orig = data = RREG32(mmRLC_PG_CNTL);
3851
3852         if (enable)
3853                 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3854         else
3855                 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3856
3857         if (orig != data)
3858                 WREG32(mmRLC_PG_CNTL, data);
3859 }
3860
3861 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3862                                                   bool enable)
3863 {
3864         u32 data, orig;
3865
3866         orig = data = RREG32(mmRLC_PG_CNTL);
3867
3868         if (enable)
3869                 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3870         else
3871                 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3872
3873         if (orig != data)
3874                 WREG32(mmRLC_PG_CNTL, data);
3875 }
3876
3877 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3878 {
3879         u32 data, orig;
3880
3881         orig = data = RREG32(mmRLC_PG_CNTL);
3882
3883         if (enable)
3884                 data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK;
3885         else
3886                 data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK;
3887
3888         if (orig != data)
3889                 WREG32(mmRLC_PG_CNTL, data);
3890 }
3891
3892 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3893 {
3894         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3895                               AMD_PG_SUPPORT_GFX_SMG |
3896                               AMD_PG_SUPPORT_GFX_DMG |
3897                               AMD_PG_SUPPORT_CP |
3898                               AMD_PG_SUPPORT_GDS |
3899                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
3900                 gfx_v8_0_init_csb(adev);
3901                 gfx_v8_0_init_save_restore_list(adev);
3902                 gfx_v8_0_enable_save_restore_machine(adev);
3903
3904                 if ((adev->asic_type == CHIP_CARRIZO) ||
3905                     (adev->asic_type == CHIP_STONEY)) {
3906                         WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3907                         gfx_v8_0_init_power_gating(adev);
3908                         WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3909                         if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
3910                                 cz_enable_sck_slow_down_on_power_up(adev, true);
3911                                 cz_enable_sck_slow_down_on_power_down(adev, true);
3912                         } else {
3913                                 cz_enable_sck_slow_down_on_power_up(adev, false);
3914                                 cz_enable_sck_slow_down_on_power_down(adev, false);
3915                         }
3916                         if (adev->pg_flags & AMD_PG_SUPPORT_CP)
3917                                 cz_enable_cp_power_gating(adev, true);
3918                         else
3919                                 cz_enable_cp_power_gating(adev, false);
3920                 } else if (adev->asic_type == CHIP_POLARIS11) {
3921                         gfx_v8_0_init_power_gating(adev);
3922                 }
3923         }
3924 }
3925
3926 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3927 {
3928         u32 tmp = RREG32(mmRLC_CNTL);
3929
3930         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
3931         WREG32(mmRLC_CNTL, tmp);
3932
3933         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3934
3935         gfx_v8_0_wait_for_rlc_serdes(adev);
3936 }
3937
3938 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3939 {
3940         u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3941
3942         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3943         WREG32(mmGRBM_SOFT_RESET, tmp);
3944         udelay(50);
3945         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3946         WREG32(mmGRBM_SOFT_RESET, tmp);
3947         udelay(50);
3948 }
3949
3950 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3951 {
3952         u32 tmp = RREG32(mmRLC_CNTL);
3953
3954         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
3955         WREG32(mmRLC_CNTL, tmp);
3956
3957         /* carrizo do enable cp interrupt after cp inited */
3958         if (!(adev->flags & AMD_IS_APU))
3959                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3960
3961         udelay(50);
3962 }
3963
3964 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3965 {
3966         const struct rlc_firmware_header_v2_0 *hdr;
3967         const __le32 *fw_data;
3968         unsigned i, fw_size;
3969
3970         if (!adev->gfx.rlc_fw)
3971                 return -EINVAL;
3972
3973         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3974         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3975
3976         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3977                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3978         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3979
3980         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3981         for (i = 0; i < fw_size; i++)
3982                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3983         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3984
3985         return 0;
3986 }
3987
3988 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3989 {
3990         int r;
3991
3992         gfx_v8_0_rlc_stop(adev);
3993
3994         /* disable CG */
3995         WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
3996         if (adev->asic_type == CHIP_POLARIS11 ||
3997                 adev->asic_type == CHIP_POLARIS10)
3998                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
3999
4000         /* disable PG */
4001         WREG32(mmRLC_PG_CNTL, 0);
4002
4003         gfx_v8_0_rlc_reset(adev);
4004
4005         gfx_v8_0_init_pg(adev);
4006
4007         if (!adev->pp_enabled) {
4008                 if (!adev->firmware.smu_load) {
4009                         /* legacy rlc firmware loading */
4010                         r = gfx_v8_0_rlc_load_microcode(adev);
4011                         if (r)
4012                                 return r;
4013                 } else {
4014                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4015                                                         AMDGPU_UCODE_ID_RLC_G);
4016                         if (r)
4017                                 return -EINVAL;
4018                 }
4019         }
4020
4021         gfx_v8_0_rlc_start(adev);
4022
4023         return 0;
4024 }
4025
4026 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4027 {
4028         int i;
4029         u32 tmp = RREG32(mmCP_ME_CNTL);
4030
4031         if (enable) {
4032                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4033                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4034                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4035         } else {
4036                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4037                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4038                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4039                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4040                         adev->gfx.gfx_ring[i].ready = false;
4041         }
4042         WREG32(mmCP_ME_CNTL, tmp);
4043         udelay(50);
4044 }
4045
4046 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4047 {
4048         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4049         const struct gfx_firmware_header_v1_0 *ce_hdr;
4050         const struct gfx_firmware_header_v1_0 *me_hdr;
4051         const __le32 *fw_data;
4052         unsigned i, fw_size;
4053
4054         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4055                 return -EINVAL;
4056
4057         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4058                 adev->gfx.pfp_fw->data;
4059         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4060                 adev->gfx.ce_fw->data;
4061         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4062                 adev->gfx.me_fw->data;
4063
4064         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4065         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4066         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4067
4068         gfx_v8_0_cp_gfx_enable(adev, false);
4069
4070         /* PFP */
4071         fw_data = (const __le32 *)
4072                 (adev->gfx.pfp_fw->data +
4073                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4074         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4075         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4076         for (i = 0; i < fw_size; i++)
4077                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4078         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4079
4080         /* CE */
4081         fw_data = (const __le32 *)
4082                 (adev->gfx.ce_fw->data +
4083                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4084         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4085         WREG32(mmCP_CE_UCODE_ADDR, 0);
4086         for (i = 0; i < fw_size; i++)
4087                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4088         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4089
4090         /* ME */
4091         fw_data = (const __le32 *)
4092                 (adev->gfx.me_fw->data +
4093                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4094         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4095         WREG32(mmCP_ME_RAM_WADDR, 0);
4096         for (i = 0; i < fw_size; i++)
4097                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4098         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4099
4100         return 0;
4101 }
4102
4103 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4104 {
4105         u32 count = 0;
4106         const struct cs_section_def *sect = NULL;
4107         const struct cs_extent_def *ext = NULL;
4108
4109         /* begin clear state */
4110         count += 2;
4111         /* context control state */
4112         count += 3;
4113
4114         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4115                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4116                         if (sect->id == SECT_CONTEXT)
4117                                 count += 2 + ext->reg_count;
4118                         else
4119                                 return 0;
4120                 }
4121         }
4122         /* pa_sc_raster_config/pa_sc_raster_config1 */
4123         count += 4;
4124         /* end clear state */
4125         count += 2;
4126         /* clear state */
4127         count += 2;
4128
4129         return count;
4130 }
4131
4132 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4133 {
4134         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4135         const struct cs_section_def *sect = NULL;
4136         const struct cs_extent_def *ext = NULL;
4137         int r, i;
4138
4139         /* init the CP */
4140         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4141         WREG32(mmCP_ENDIAN_SWAP, 0);
4142         WREG32(mmCP_DEVICE_ID, 1);
4143
4144         gfx_v8_0_cp_gfx_enable(adev, true);
4145
4146         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4147         if (r) {
4148                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4149                 return r;
4150         }
4151
4152         /* clear state buffer */
4153         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4154         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4155
4156         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4157         amdgpu_ring_write(ring, 0x80000000);
4158         amdgpu_ring_write(ring, 0x80000000);
4159
4160         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4161                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4162                         if (sect->id == SECT_CONTEXT) {
4163                                 amdgpu_ring_write(ring,
4164                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4165                                                ext->reg_count));
4166                                 amdgpu_ring_write(ring,
4167                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4168                                 for (i = 0; i < ext->reg_count; i++)
4169                                         amdgpu_ring_write(ring, ext->extent[i]);
4170                         }
4171                 }
4172         }
4173
4174         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4175         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4176         switch (adev->asic_type) {
4177         case CHIP_TONGA:
4178         case CHIP_POLARIS10:
4179                 amdgpu_ring_write(ring, 0x16000012);
4180                 amdgpu_ring_write(ring, 0x0000002A);
4181                 break;
4182         case CHIP_POLARIS11:
4183                 amdgpu_ring_write(ring, 0x16000012);
4184                 amdgpu_ring_write(ring, 0x00000000);
4185                 break;
4186         case CHIP_FIJI:
4187                 amdgpu_ring_write(ring, 0x3a00161a);
4188                 amdgpu_ring_write(ring, 0x0000002e);
4189                 break;
4190         case CHIP_CARRIZO:
4191                 amdgpu_ring_write(ring, 0x00000002);
4192                 amdgpu_ring_write(ring, 0x00000000);
4193                 break;
4194         case CHIP_TOPAZ:
4195                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4196                                 0x00000000 : 0x00000002);
4197                 amdgpu_ring_write(ring, 0x00000000);
4198                 break;
4199         case CHIP_STONEY:
4200                 amdgpu_ring_write(ring, 0x00000000);
4201                 amdgpu_ring_write(ring, 0x00000000);
4202                 break;
4203         default:
4204                 BUG();
4205         }
4206
4207         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4208         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4209
4210         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4211         amdgpu_ring_write(ring, 0);
4212
4213         /* init the CE partitions */
4214         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4215         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4216         amdgpu_ring_write(ring, 0x8000);
4217         amdgpu_ring_write(ring, 0x8000);
4218
4219         amdgpu_ring_commit(ring);
4220
4221         return 0;
4222 }
4223
4224 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4225 {
4226         struct amdgpu_ring *ring;
4227         u32 tmp;
4228         u32 rb_bufsz;
4229         u64 rb_addr, rptr_addr;
4230         int r;
4231
4232         /* Set the write pointer delay */
4233         WREG32(mmCP_RB_WPTR_DELAY, 0);
4234
4235         /* set the RB to use vmid 0 */
4236         WREG32(mmCP_RB_VMID, 0);
4237
4238         /* Set ring buffer size */
4239         ring = &adev->gfx.gfx_ring[0];
4240         rb_bufsz = order_base_2(ring->ring_size / 8);
4241         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4242         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4243         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4244         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4245 #ifdef __BIG_ENDIAN
4246         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4247 #endif
4248         WREG32(mmCP_RB0_CNTL, tmp);
4249
4250         /* Initialize the ring buffer's read and write pointers */
4251         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4252         ring->wptr = 0;
4253         WREG32(mmCP_RB0_WPTR, ring->wptr);
4254
4255         /* set the wb address wether it's enabled or not */
4256         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4257         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4258         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4259
4260         mdelay(1);
4261         WREG32(mmCP_RB0_CNTL, tmp);
4262
4263         rb_addr = ring->gpu_addr >> 8;
4264         WREG32(mmCP_RB0_BASE, rb_addr);
4265         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4266
4267         /* no gfx doorbells on iceland */
4268         if (adev->asic_type != CHIP_TOPAZ) {
4269                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4270                 if (ring->use_doorbell) {
4271                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4272                                             DOORBELL_OFFSET, ring->doorbell_index);
4273                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4274                                             DOORBELL_HIT, 0);
4275                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4276                                             DOORBELL_EN, 1);
4277                 } else {
4278                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4279                                             DOORBELL_EN, 0);
4280                 }
4281                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4282
4283                 if (adev->asic_type == CHIP_TONGA) {
4284                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4285                                             DOORBELL_RANGE_LOWER,
4286                                             AMDGPU_DOORBELL_GFX_RING0);
4287                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4288
4289                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4290                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4291                 }
4292
4293         }
4294
4295         /* start the ring */
4296         gfx_v8_0_cp_gfx_start(adev);
4297         ring->ready = true;
4298         r = amdgpu_ring_test_ring(ring);
4299         if (r) {
4300                 ring->ready = false;
4301                 return r;
4302         }
4303
4304         return 0;
4305 }
4306
4307 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4308 {
4309         int i;
4310
4311         if (enable) {
4312                 WREG32(mmCP_MEC_CNTL, 0);
4313         } else {
4314                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4315                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4316                         adev->gfx.compute_ring[i].ready = false;
4317         }
4318         udelay(50);
4319 }
4320
4321 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4322 {
4323         const struct gfx_firmware_header_v1_0 *mec_hdr;
4324         const __le32 *fw_data;
4325         unsigned i, fw_size;
4326
4327         if (!adev->gfx.mec_fw)
4328                 return -EINVAL;
4329
4330         gfx_v8_0_cp_compute_enable(adev, false);
4331
4332         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4333         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4334
4335         fw_data = (const __le32 *)
4336                 (adev->gfx.mec_fw->data +
4337                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4338         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4339
4340         /* MEC1 */
4341         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4342         for (i = 0; i < fw_size; i++)
4343                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4344         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4345
4346         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4347         if (adev->gfx.mec2_fw) {
4348                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4349
4350                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4351                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4352
4353                 fw_data = (const __le32 *)
4354                         (adev->gfx.mec2_fw->data +
4355                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4356                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4357
4358                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4359                 for (i = 0; i < fw_size; i++)
4360                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4361                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4362         }
4363
4364         return 0;
4365 }
4366
4367 struct vi_mqd {
4368         uint32_t header;  /* ordinal0 */
4369         uint32_t compute_dispatch_initiator;  /* ordinal1 */
4370         uint32_t compute_dim_x;  /* ordinal2 */
4371         uint32_t compute_dim_y;  /* ordinal3 */
4372         uint32_t compute_dim_z;  /* ordinal4 */
4373         uint32_t compute_start_x;  /* ordinal5 */
4374         uint32_t compute_start_y;  /* ordinal6 */
4375         uint32_t compute_start_z;  /* ordinal7 */
4376         uint32_t compute_num_thread_x;  /* ordinal8 */
4377         uint32_t compute_num_thread_y;  /* ordinal9 */
4378         uint32_t compute_num_thread_z;  /* ordinal10 */
4379         uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4380         uint32_t compute_perfcount_enable;  /* ordinal12 */
4381         uint32_t compute_pgm_lo;  /* ordinal13 */
4382         uint32_t compute_pgm_hi;  /* ordinal14 */
4383         uint32_t compute_tba_lo;  /* ordinal15 */
4384         uint32_t compute_tba_hi;  /* ordinal16 */
4385         uint32_t compute_tma_lo;  /* ordinal17 */
4386         uint32_t compute_tma_hi;  /* ordinal18 */
4387         uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4388         uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4389         uint32_t compute_vmid;  /* ordinal21 */
4390         uint32_t compute_resource_limits;  /* ordinal22 */
4391         uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4392         uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4393         uint32_t compute_tmpring_size;  /* ordinal25 */
4394         uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4395         uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4396         uint32_t compute_restart_x;  /* ordinal28 */
4397         uint32_t compute_restart_y;  /* ordinal29 */
4398         uint32_t compute_restart_z;  /* ordinal30 */
4399         uint32_t compute_thread_trace_enable;  /* ordinal31 */
4400         uint32_t compute_misc_reserved;  /* ordinal32 */
4401         uint32_t compute_dispatch_id;  /* ordinal33 */
4402         uint32_t compute_threadgroup_id;  /* ordinal34 */
4403         uint32_t compute_relaunch;  /* ordinal35 */
4404         uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4405         uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4406         uint32_t compute_wave_restore_control;  /* ordinal38 */
4407         uint32_t reserved9;  /* ordinal39 */
4408         uint32_t reserved10;  /* ordinal40 */
4409         uint32_t reserved11;  /* ordinal41 */
4410         uint32_t reserved12;  /* ordinal42 */
4411         uint32_t reserved13;  /* ordinal43 */
4412         uint32_t reserved14;  /* ordinal44 */
4413         uint32_t reserved15;  /* ordinal45 */
4414         uint32_t reserved16;  /* ordinal46 */
4415         uint32_t reserved17;  /* ordinal47 */
4416         uint32_t reserved18;  /* ordinal48 */
4417         uint32_t reserved19;  /* ordinal49 */
4418         uint32_t reserved20;  /* ordinal50 */
4419         uint32_t reserved21;  /* ordinal51 */
4420         uint32_t reserved22;  /* ordinal52 */
4421         uint32_t reserved23;  /* ordinal53 */
4422         uint32_t reserved24;  /* ordinal54 */
4423         uint32_t reserved25;  /* ordinal55 */
4424         uint32_t reserved26;  /* ordinal56 */
4425         uint32_t reserved27;  /* ordinal57 */
4426         uint32_t reserved28;  /* ordinal58 */
4427         uint32_t reserved29;  /* ordinal59 */
4428         uint32_t reserved30;  /* ordinal60 */
4429         uint32_t reserved31;  /* ordinal61 */
4430         uint32_t reserved32;  /* ordinal62 */
4431         uint32_t reserved33;  /* ordinal63 */
4432         uint32_t reserved34;  /* ordinal64 */
4433         uint32_t compute_user_data_0;  /* ordinal65 */
4434         uint32_t compute_user_data_1;  /* ordinal66 */
4435         uint32_t compute_user_data_2;  /* ordinal67 */
4436         uint32_t compute_user_data_3;  /* ordinal68 */
4437         uint32_t compute_user_data_4;  /* ordinal69 */
4438         uint32_t compute_user_data_5;  /* ordinal70 */
4439         uint32_t compute_user_data_6;  /* ordinal71 */
4440         uint32_t compute_user_data_7;  /* ordinal72 */
4441         uint32_t compute_user_data_8;  /* ordinal73 */
4442         uint32_t compute_user_data_9;  /* ordinal74 */
4443         uint32_t compute_user_data_10;  /* ordinal75 */
4444         uint32_t compute_user_data_11;  /* ordinal76 */
4445         uint32_t compute_user_data_12;  /* ordinal77 */
4446         uint32_t compute_user_data_13;  /* ordinal78 */
4447         uint32_t compute_user_data_14;  /* ordinal79 */
4448         uint32_t compute_user_data_15;  /* ordinal80 */
4449         uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4450         uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4451         uint32_t reserved35;  /* ordinal83 */
4452         uint32_t reserved36;  /* ordinal84 */
4453         uint32_t reserved37;  /* ordinal85 */
4454         uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4455         uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4456         uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4457         uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4458         uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4459         uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4460         uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4461         uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4462         uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4463         uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4464         uint32_t reserved38;  /* ordinal96 */
4465         uint32_t reserved39;  /* ordinal97 */
4466         uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4467         uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4468         uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4469         uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4470         uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4471         uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4472         uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4473         uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4474         uint32_t reserved40;  /* ordinal106 */
4475         uint32_t reserved41;  /* ordinal107 */
4476         uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4477         uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4478         uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4479         uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4480         uint32_t reserved42;  /* ordinal112 */
4481         uint32_t reserved43;  /* ordinal113 */
4482         uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4483         uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4484         uint32_t cp_packet_id_lo;  /* ordinal116 */
4485         uint32_t cp_packet_id_hi;  /* ordinal117 */
4486         uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4487         uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4488         uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4489         uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4490         uint32_t gds_save_mask_lo;  /* ordinal122 */
4491         uint32_t gds_save_mask_hi;  /* ordinal123 */
4492         uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4493         uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4494         uint32_t reserved44;  /* ordinal126 */
4495         uint32_t reserved45;  /* ordinal127 */
4496         uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4497         uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4498         uint32_t cp_hqd_active;  /* ordinal130 */
4499         uint32_t cp_hqd_vmid;  /* ordinal131 */
4500         uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4501         uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4502         uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4503         uint32_t cp_hqd_quantum;  /* ordinal135 */
4504         uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4505         uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4506         uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4507         uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4508         uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4509         uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4510         uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4511         uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4512         uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4513         uint32_t cp_hqd_pq_control;  /* ordinal145 */
4514         uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4515         uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4516         uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4517         uint32_t cp_hqd_ib_control;  /* ordinal149 */
4518         uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4519         uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4520         uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4521         uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4522         uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4523         uint32_t cp_hqd_msg_type;  /* ordinal155 */
4524         uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4525         uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4526         uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4527         uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4528         uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4529         uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4530         uint32_t cp_mqd_control;  /* ordinal162 */
4531         uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4532         uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4533         uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4534         uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4535         uint32_t cp_hqd_eop_control;  /* ordinal167 */
4536         uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4537         uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4538         uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4539         uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4540         uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4541         uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4542         uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4543         uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4544         uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4545         uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4546         uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4547         uint32_t cp_hqd_error;  /* ordinal179 */
4548         uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4549         uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4550         uint32_t reserved46;  /* ordinal182 */
4551         uint32_t reserved47;  /* ordinal183 */
4552         uint32_t reserved48;  /* ordinal184 */
4553         uint32_t reserved49;  /* ordinal185 */
4554         uint32_t reserved50;  /* ordinal186 */
4555         uint32_t reserved51;  /* ordinal187 */
4556         uint32_t reserved52;  /* ordinal188 */
4557         uint32_t reserved53;  /* ordinal189 */
4558         uint32_t reserved54;  /* ordinal190 */
4559         uint32_t reserved55;  /* ordinal191 */
4560         uint32_t iqtimer_pkt_header;  /* ordinal192 */
4561         uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4562         uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4563         uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4564         uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4565         uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4566         uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4567         uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4568         uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4569         uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4570         uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4571         uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4572         uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4573         uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4574         uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4575         uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4576         uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4577         uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4578         uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4579         uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4580         uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4581         uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4582         uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4583         uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4584         uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4585         uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4586         uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4587         uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4588         uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4589         uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4590         uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4591         uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4592         uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4593         uint32_t reserved56;  /* ordinal225 */
4594         uint32_t reserved57;  /* ordinal226 */
4595         uint32_t reserved58;  /* ordinal227 */
4596         uint32_t set_resources_header;  /* ordinal228 */
4597         uint32_t set_resources_dw1;  /* ordinal229 */
4598         uint32_t set_resources_dw2;  /* ordinal230 */
4599         uint32_t set_resources_dw3;  /* ordinal231 */
4600         uint32_t set_resources_dw4;  /* ordinal232 */
4601         uint32_t set_resources_dw5;  /* ordinal233 */
4602         uint32_t set_resources_dw6;  /* ordinal234 */
4603         uint32_t set_resources_dw7;  /* ordinal235 */
4604         uint32_t reserved59;  /* ordinal236 */
4605         uint32_t reserved60;  /* ordinal237 */
4606         uint32_t reserved61;  /* ordinal238 */
4607         uint32_t reserved62;  /* ordinal239 */
4608         uint32_t reserved63;  /* ordinal240 */
4609         uint32_t reserved64;  /* ordinal241 */
4610         uint32_t reserved65;  /* ordinal242 */
4611         uint32_t reserved66;  /* ordinal243 */
4612         uint32_t reserved67;  /* ordinal244 */
4613         uint32_t reserved68;  /* ordinal245 */
4614         uint32_t reserved69;  /* ordinal246 */
4615         uint32_t reserved70;  /* ordinal247 */
4616         uint32_t reserved71;  /* ordinal248 */
4617         uint32_t reserved72;  /* ordinal249 */
4618         uint32_t reserved73;  /* ordinal250 */
4619         uint32_t reserved74;  /* ordinal251 */
4620         uint32_t reserved75;  /* ordinal252 */
4621         uint32_t reserved76;  /* ordinal253 */
4622         uint32_t reserved77;  /* ordinal254 */
4623         uint32_t reserved78;  /* ordinal255 */
4624
4625         uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4626 };
4627
4628 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4629 {
4630         int i, r;
4631
4632         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4633                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4634
4635                 if (ring->mqd_obj) {
4636                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4637                         if (unlikely(r != 0))
4638                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4639
4640                         amdgpu_bo_unpin(ring->mqd_obj);
4641                         amdgpu_bo_unreserve(ring->mqd_obj);
4642
4643                         amdgpu_bo_unref(&ring->mqd_obj);
4644                         ring->mqd_obj = NULL;
4645                 }
4646         }
4647 }
4648
4649 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4650 {
4651         int r, i, j;
4652         u32 tmp;
4653         bool use_doorbell = true;
4654         u64 hqd_gpu_addr;
4655         u64 mqd_gpu_addr;
4656         u64 eop_gpu_addr;
4657         u64 wb_gpu_addr;
4658         u32 *buf;
4659         struct vi_mqd *mqd;
4660
4661         /* init the pipes */
4662         mutex_lock(&adev->srbm_mutex);
4663         for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4664                 int me = (i < 4) ? 1 : 2;
4665                 int pipe = (i < 4) ? i : (i - 4);
4666
4667                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4668                 eop_gpu_addr >>= 8;
4669
4670                 vi_srbm_select(adev, me, pipe, 0, 0);
4671
4672                 /* write the EOP addr */
4673                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4674                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4675
4676                 /* set the VMID assigned */
4677                 WREG32(mmCP_HQD_VMID, 0);
4678
4679                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4680                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4681                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4682                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
4683                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4684         }
4685         vi_srbm_select(adev, 0, 0, 0, 0);
4686         mutex_unlock(&adev->srbm_mutex);
4687
4688         /* init the queues.  Just two for now. */
4689         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4690                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4691
4692                 if (ring->mqd_obj == NULL) {
4693                         r = amdgpu_bo_create(adev,
4694                                              sizeof(struct vi_mqd),
4695                                              PAGE_SIZE, true,
4696                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4697                                              NULL, &ring->mqd_obj);
4698                         if (r) {
4699                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4700                                 return r;
4701                         }
4702                 }
4703
4704                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4705                 if (unlikely(r != 0)) {
4706                         gfx_v8_0_cp_compute_fini(adev);
4707                         return r;
4708                 }
4709                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4710                                   &mqd_gpu_addr);
4711                 if (r) {
4712                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4713                         gfx_v8_0_cp_compute_fini(adev);
4714                         return r;
4715                 }
4716                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4717                 if (r) {
4718                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4719                         gfx_v8_0_cp_compute_fini(adev);
4720                         return r;
4721                 }
4722
4723                 /* init the mqd struct */
4724                 memset(buf, 0, sizeof(struct vi_mqd));
4725
4726                 mqd = (struct vi_mqd *)buf;
4727                 mqd->header = 0xC0310800;
4728                 mqd->compute_pipelinestat_enable = 0x00000001;
4729                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4730                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4731                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4732                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4733                 mqd->compute_misc_reserved = 0x00000003;
4734
4735                 mutex_lock(&adev->srbm_mutex);
4736                 vi_srbm_select(adev, ring->me,
4737                                ring->pipe,
4738                                ring->queue, 0);
4739
4740                 /* disable wptr polling */
4741                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4742                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4743                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4744
4745                 mqd->cp_hqd_eop_base_addr_lo =
4746                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
4747                 mqd->cp_hqd_eop_base_addr_hi =
4748                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4749
4750                 /* enable doorbell? */
4751                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4752                 if (use_doorbell) {
4753                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4754                 } else {
4755                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4756                 }
4757                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4758                 mqd->cp_hqd_pq_doorbell_control = tmp;
4759
4760                 /* disable the queue if it's active */
4761                 mqd->cp_hqd_dequeue_request = 0;
4762                 mqd->cp_hqd_pq_rptr = 0;
4763                 mqd->cp_hqd_pq_wptr= 0;
4764                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4765                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4766                         for (j = 0; j < adev->usec_timeout; j++) {
4767                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4768                                         break;
4769                                 udelay(1);
4770                         }
4771                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4772                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4773                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4774                 }
4775
4776                 /* set the pointer to the MQD */
4777                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4778                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4779                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4780                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4781
4782                 /* set MQD vmid to 0 */
4783                 tmp = RREG32(mmCP_MQD_CONTROL);
4784                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4785                 WREG32(mmCP_MQD_CONTROL, tmp);
4786                 mqd->cp_mqd_control = tmp;
4787
4788                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4789                 hqd_gpu_addr = ring->gpu_addr >> 8;
4790                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4791                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4792                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4793                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4794
4795                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4796                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4797                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4798                                     (order_base_2(ring->ring_size / 4) - 1));
4799                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4800                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4801 #ifdef __BIG_ENDIAN
4802                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4803 #endif
4804                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4805                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4806                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4807                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4808                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4809                 mqd->cp_hqd_pq_control = tmp;
4810
4811                 /* set the wb address wether it's enabled or not */
4812                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4813                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4814                 mqd->cp_hqd_pq_rptr_report_addr_hi =
4815                         upper_32_bits(wb_gpu_addr) & 0xffff;
4816                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4817                        mqd->cp_hqd_pq_rptr_report_addr_lo);
4818                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4819                        mqd->cp_hqd_pq_rptr_report_addr_hi);
4820
4821                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4822                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4823                 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4824                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4825                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4826                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4827                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
4828
4829                 /* enable the doorbell if requested */
4830                 if (use_doorbell) {
4831                         if ((adev->asic_type == CHIP_CARRIZO) ||
4832                             (adev->asic_type == CHIP_FIJI) ||
4833                             (adev->asic_type == CHIP_STONEY) ||
4834                             (adev->asic_type == CHIP_POLARIS11) ||
4835                             (adev->asic_type == CHIP_POLARIS10)) {
4836                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4837                                        AMDGPU_DOORBELL_KIQ << 2);
4838                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4839                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
4840                         }
4841                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4842                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4843                                             DOORBELL_OFFSET, ring->doorbell_index);
4844                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4845                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4846                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4847                         mqd->cp_hqd_pq_doorbell_control = tmp;
4848
4849                 } else {
4850                         mqd->cp_hqd_pq_doorbell_control = 0;
4851                 }
4852                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4853                        mqd->cp_hqd_pq_doorbell_control);
4854
4855                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4856                 ring->wptr = 0;
4857                 mqd->cp_hqd_pq_wptr = ring->wptr;
4858                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4859                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4860
4861                 /* set the vmid for the queue */
4862                 mqd->cp_hqd_vmid = 0;
4863                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4864
4865                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4866                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4867                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4868                 mqd->cp_hqd_persistent_state = tmp;
4869                 if (adev->asic_type == CHIP_STONEY ||
4870                         adev->asic_type == CHIP_POLARIS11 ||
4871                         adev->asic_type == CHIP_POLARIS10) {
4872                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4873                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4874                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4875                 }
4876
4877                 /* activate the queue */
4878                 mqd->cp_hqd_active = 1;
4879                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4880
4881                 vi_srbm_select(adev, 0, 0, 0, 0);
4882                 mutex_unlock(&adev->srbm_mutex);
4883
4884                 amdgpu_bo_kunmap(ring->mqd_obj);
4885                 amdgpu_bo_unreserve(ring->mqd_obj);
4886         }
4887
4888         if (use_doorbell) {
4889                 tmp = RREG32(mmCP_PQ_STATUS);
4890                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4891                 WREG32(mmCP_PQ_STATUS, tmp);
4892         }
4893
4894         gfx_v8_0_cp_compute_enable(adev, true);
4895
4896         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4897                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4898
4899                 ring->ready = true;
4900                 r = amdgpu_ring_test_ring(ring);
4901                 if (r)
4902                         ring->ready = false;
4903         }
4904
4905         return 0;
4906 }
4907
4908 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4909 {
4910         int r;
4911
4912         if (!(adev->flags & AMD_IS_APU))
4913                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4914
4915         if (!adev->pp_enabled) {
4916                 if (!adev->firmware.smu_load) {
4917                         /* legacy firmware loading */
4918                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
4919                         if (r)
4920                                 return r;
4921
4922                         r = gfx_v8_0_cp_compute_load_microcode(adev);
4923                         if (r)
4924                                 return r;
4925                 } else {
4926                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4927                                                         AMDGPU_UCODE_ID_CP_CE);
4928                         if (r)
4929                                 return -EINVAL;
4930
4931                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4932                                                         AMDGPU_UCODE_ID_CP_PFP);
4933                         if (r)
4934                                 return -EINVAL;
4935
4936                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4937                                                         AMDGPU_UCODE_ID_CP_ME);
4938                         if (r)
4939                                 return -EINVAL;
4940
4941                         if (adev->asic_type == CHIP_TOPAZ) {
4942                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4943                                 if (r)
4944                                         return r;
4945                         } else {
4946                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4947                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
4948                                 if (r)
4949                                         return -EINVAL;
4950                         }
4951                 }
4952         }
4953
4954         r = gfx_v8_0_cp_gfx_resume(adev);
4955         if (r)
4956                 return r;
4957
4958         r = gfx_v8_0_cp_compute_resume(adev);
4959         if (r)
4960                 return r;
4961
4962         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4963
4964         return 0;
4965 }
4966
4967 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4968 {
4969         gfx_v8_0_cp_gfx_enable(adev, enable);
4970         gfx_v8_0_cp_compute_enable(adev, enable);
4971 }
4972
4973 static int gfx_v8_0_hw_init(void *handle)
4974 {
4975         int r;
4976         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4977
4978         gfx_v8_0_init_golden_registers(adev);
4979
4980         gfx_v8_0_gpu_init(adev);
4981
4982         r = gfx_v8_0_rlc_resume(adev);
4983         if (r)
4984                 return r;
4985
4986         r = gfx_v8_0_cp_resume(adev);
4987         if (r)
4988                 return r;
4989
4990         return r;
4991 }
4992
4993 static int gfx_v8_0_hw_fini(void *handle)
4994 {
4995         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4996
4997         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4998         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4999         gfx_v8_0_cp_enable(adev, false);
5000         gfx_v8_0_rlc_stop(adev);
5001         gfx_v8_0_cp_compute_fini(adev);
5002
5003         amdgpu_set_powergating_state(adev,
5004                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5005
5006         return 0;
5007 }
5008
5009 static int gfx_v8_0_suspend(void *handle)
5010 {
5011         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5012
5013         return gfx_v8_0_hw_fini(adev);
5014 }
5015
5016 static int gfx_v8_0_resume(void *handle)
5017 {
5018         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5019
5020         return gfx_v8_0_hw_init(adev);
5021 }
5022
5023 static bool gfx_v8_0_is_idle(void *handle)
5024 {
5025         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5026
5027         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5028                 return false;
5029         else
5030                 return true;
5031 }
5032
5033 static int gfx_v8_0_wait_for_idle(void *handle)
5034 {
5035         unsigned i;
5036         u32 tmp;
5037         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5038
5039         for (i = 0; i < adev->usec_timeout; i++) {
5040                 /* read MC_STATUS */
5041                 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
5042
5043                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
5044                         return 0;
5045                 udelay(1);
5046         }
5047         return -ETIMEDOUT;
5048 }
5049
5050 static int gfx_v8_0_soft_reset(void *handle)
5051 {
5052         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5053         u32 tmp;
5054         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5055
5056         /* GRBM_STATUS */
5057         tmp = RREG32(mmGRBM_STATUS);
5058         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5059                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5060                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5061                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5062                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5063                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
5064                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5065                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5066                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5067                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5068         }
5069
5070         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5071                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5072                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5073                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5074                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5075         }
5076
5077         /* GRBM_STATUS2 */
5078         tmp = RREG32(mmGRBM_STATUS2);
5079         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5080                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5081                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5082
5083         /* SRBM_STATUS */
5084         tmp = RREG32(mmSRBM_STATUS);
5085         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5086                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5087                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5088
5089         if (grbm_soft_reset || srbm_soft_reset) {
5090                 /* stop the rlc */
5091                 gfx_v8_0_rlc_stop(adev);
5092
5093                 /* Disable GFX parsing/prefetching */
5094                 gfx_v8_0_cp_gfx_enable(adev, false);
5095
5096                 /* Disable MEC parsing/prefetching */
5097                 gfx_v8_0_cp_compute_enable(adev, false);
5098
5099                 if (grbm_soft_reset || srbm_soft_reset) {
5100                         tmp = RREG32(mmGMCON_DEBUG);
5101                         tmp = REG_SET_FIELD(tmp,
5102                                             GMCON_DEBUG, GFX_STALL, 1);
5103                         tmp = REG_SET_FIELD(tmp,
5104                                             GMCON_DEBUG, GFX_CLEAR, 1);
5105                         WREG32(mmGMCON_DEBUG, tmp);
5106
5107                         udelay(50);
5108                 }
5109
5110                 if (grbm_soft_reset) {
5111                         tmp = RREG32(mmGRBM_SOFT_RESET);
5112                         tmp |= grbm_soft_reset;
5113                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5114                         WREG32(mmGRBM_SOFT_RESET, tmp);
5115                         tmp = RREG32(mmGRBM_SOFT_RESET);
5116
5117                         udelay(50);
5118
5119                         tmp &= ~grbm_soft_reset;
5120                         WREG32(mmGRBM_SOFT_RESET, tmp);
5121                         tmp = RREG32(mmGRBM_SOFT_RESET);
5122                 }
5123
5124                 if (srbm_soft_reset) {
5125                         tmp = RREG32(mmSRBM_SOFT_RESET);
5126                         tmp |= srbm_soft_reset;
5127                         dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5128                         WREG32(mmSRBM_SOFT_RESET, tmp);
5129                         tmp = RREG32(mmSRBM_SOFT_RESET);
5130
5131                         udelay(50);
5132
5133                         tmp &= ~srbm_soft_reset;
5134                         WREG32(mmSRBM_SOFT_RESET, tmp);
5135                         tmp = RREG32(mmSRBM_SOFT_RESET);
5136                 }
5137
5138                 if (grbm_soft_reset || srbm_soft_reset) {
5139                         tmp = RREG32(mmGMCON_DEBUG);
5140                         tmp = REG_SET_FIELD(tmp,
5141                                             GMCON_DEBUG, GFX_STALL, 0);
5142                         tmp = REG_SET_FIELD(tmp,
5143                                             GMCON_DEBUG, GFX_CLEAR, 0);
5144                         WREG32(mmGMCON_DEBUG, tmp);
5145                 }
5146
5147                 /* Wait a little for things to settle down */
5148                 udelay(50);
5149         }
5150         return 0;
5151 }
5152
5153 /**
5154  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5155  *
5156  * @adev: amdgpu_device pointer
5157  *
5158  * Fetches a GPU clock counter snapshot.
5159  * Returns the 64 bit clock counter snapshot.
5160  */
5161 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5162 {
5163         uint64_t clock;
5164
5165         mutex_lock(&adev->gfx.gpu_clock_mutex);
5166         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5167         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5168                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5169         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5170         return clock;
5171 }
5172
5173 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5174                                           uint32_t vmid,
5175                                           uint32_t gds_base, uint32_t gds_size,
5176                                           uint32_t gws_base, uint32_t gws_size,
5177                                           uint32_t oa_base, uint32_t oa_size)
5178 {
5179         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5180         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5181
5182         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5183         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5184
5185         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5186         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5187
5188         /* GDS Base */
5189         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5190         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5191                                 WRITE_DATA_DST_SEL(0)));
5192         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5193         amdgpu_ring_write(ring, 0);
5194         amdgpu_ring_write(ring, gds_base);
5195
5196         /* GDS Size */
5197         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5198         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5199                                 WRITE_DATA_DST_SEL(0)));
5200         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5201         amdgpu_ring_write(ring, 0);
5202         amdgpu_ring_write(ring, gds_size);
5203
5204         /* GWS */
5205         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5206         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5207                                 WRITE_DATA_DST_SEL(0)));
5208         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5209         amdgpu_ring_write(ring, 0);
5210         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5211
5212         /* OA */
5213         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5214         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5215                                 WRITE_DATA_DST_SEL(0)));
5216         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5217         amdgpu_ring_write(ring, 0);
5218         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5219 }
5220
5221 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5222         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5223         .select_se_sh = &gfx_v8_0_select_se_sh,
5224 };
5225
5226 static int gfx_v8_0_early_init(void *handle)
5227 {
5228         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5229
5230         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5231         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5232         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5233         gfx_v8_0_set_ring_funcs(adev);
5234         gfx_v8_0_set_irq_funcs(adev);
5235         gfx_v8_0_set_gds_init(adev);
5236         gfx_v8_0_set_rlc_funcs(adev);
5237
5238         return 0;
5239 }
5240
5241 static int gfx_v8_0_late_init(void *handle)
5242 {
5243         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5244         int r;
5245
5246         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5247         if (r)
5248                 return r;
5249
5250         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5251         if (r)
5252                 return r;
5253
5254         /* requires IBs so do in late init after IB pool is initialized */
5255         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5256         if (r)
5257                 return r;
5258
5259         amdgpu_set_powergating_state(adev,
5260                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5261
5262         return 0;
5263 }
5264
5265 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5266                                                        bool enable)
5267 {
5268         uint32_t data, temp;
5269
5270         if (adev->asic_type == CHIP_POLARIS11)
5271                 /* Send msg to SMU via Powerplay */
5272                 amdgpu_set_powergating_state(adev,
5273                                              AMD_IP_BLOCK_TYPE_SMC,
5274                                              enable ?
5275                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5276
5277         temp = data = RREG32(mmRLC_PG_CNTL);
5278         /* Enable static MGPG */
5279         if (enable)
5280                 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5281         else
5282                 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5283
5284         if (temp != data)
5285                 WREG32(mmRLC_PG_CNTL, data);
5286 }
5287
5288 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5289                                                         bool enable)
5290 {
5291         uint32_t data, temp;
5292
5293         temp = data = RREG32(mmRLC_PG_CNTL);
5294         /* Enable dynamic MGPG */
5295         if (enable)
5296                 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5297         else
5298                 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5299
5300         if (temp != data)
5301                 WREG32(mmRLC_PG_CNTL, data);
5302 }
5303
5304 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5305                 bool enable)
5306 {
5307         uint32_t data, temp;
5308
5309         temp = data = RREG32(mmRLC_PG_CNTL);
5310         /* Enable quick PG */
5311         if (enable)
5312                 data |= RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
5313         else
5314                 data &= ~RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
5315
5316         if (temp != data)
5317                 WREG32(mmRLC_PG_CNTL, data);
5318 }
5319
5320 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5321                                           bool enable)
5322 {
5323         u32 data, orig;
5324
5325         orig = data = RREG32(mmRLC_PG_CNTL);
5326
5327         if (enable)
5328                 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5329         else
5330                 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5331
5332         if (orig != data)
5333                 WREG32(mmRLC_PG_CNTL, data);
5334 }
5335
5336 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5337                                                 bool enable)
5338 {
5339         u32 data, orig;
5340
5341         orig = data = RREG32(mmRLC_PG_CNTL);
5342
5343         if (enable)
5344                 data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
5345         else
5346                 data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
5347
5348         if (orig != data)
5349                 WREG32(mmRLC_PG_CNTL, data);
5350
5351         /* Read any GFX register to wake up GFX. */
5352         if (!enable)
5353                 data = RREG32(mmDB_RENDER_CONTROL);
5354 }
5355
5356 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5357                                           bool enable)
5358 {
5359         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5360                 cz_enable_gfx_cg_power_gating(adev, true);
5361                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5362                         cz_enable_gfx_pipeline_power_gating(adev, true);
5363         } else {
5364                 cz_enable_gfx_cg_power_gating(adev, false);
5365                 cz_enable_gfx_pipeline_power_gating(adev, false);
5366         }
5367 }
5368
5369 static int gfx_v8_0_set_powergating_state(void *handle,
5370                                           enum amd_powergating_state state)
5371 {
5372         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5373         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5374
5375         if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5376                 return 0;
5377
5378         switch (adev->asic_type) {
5379         case CHIP_CARRIZO:
5380         case CHIP_STONEY:
5381                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5382                         cz_update_gfx_cg_power_gating(adev, enable);
5383
5384                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5385                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5386                 else
5387                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5388
5389                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5390                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5391                 else
5392                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5393                 break;
5394         case CHIP_POLARIS11:
5395                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5396                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5397                 else
5398                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5399
5400                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5401                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5402                 else
5403                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5404
5405                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5406                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5407                 else
5408                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5409                 break;
5410         default:
5411                 break;
5412         }
5413
5414         return 0;
5415 }
5416
5417 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5418                                      uint32_t reg_addr, uint32_t cmd)
5419 {
5420         uint32_t data;
5421
5422         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5423
5424         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5425         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5426
5427         data = RREG32(mmRLC_SERDES_WR_CTRL);
5428         if (adev->asic_type == CHIP_STONEY)
5429                         data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5430                         RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5431                         RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5432                         RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5433                         RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5434                         RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5435                         RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5436                         RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5437                         RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5438         else
5439                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5440                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5441                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5442                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5443                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5444                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5445                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5446                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5447                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5448                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5449                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5450         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5451                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5452                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5453                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5454
5455         WREG32(mmRLC_SERDES_WR_CTRL, data);
5456 }
5457
5458 #define MSG_ENTER_RLC_SAFE_MODE     1
5459 #define MSG_EXIT_RLC_SAFE_MODE      0
5460
5461 #define RLC_GPR_REG2__REQ_MASK           0x00000001
5462 #define RLC_GPR_REG2__MESSAGE__SHIFT     0x00000001
5463 #define RLC_GPR_REG2__MESSAGE_MASK       0x0000001e
5464
5465 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5466 {
5467         u32 data = 0;
5468         unsigned i;
5469
5470         data = RREG32(mmRLC_CNTL);
5471         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5472                 return;
5473
5474         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5475             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5476                                AMD_PG_SUPPORT_GFX_DMG))) {
5477                 data |= RLC_GPR_REG2__REQ_MASK;
5478                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5479                 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5480                 WREG32(mmRLC_GPR_REG2, data);
5481
5482                 for (i = 0; i < adev->usec_timeout; i++) {
5483                         if ((RREG32(mmRLC_GPM_STAT) &
5484                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5485                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5486                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5487                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5488                                 break;
5489                         udelay(1);
5490                 }
5491
5492                 for (i = 0; i < adev->usec_timeout; i++) {
5493                         if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5494                                 break;
5495                         udelay(1);
5496                 }
5497                 adev->gfx.rlc.in_safe_mode = true;
5498         }
5499 }
5500
5501 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5502 {
5503         u32 data;
5504         unsigned i;
5505
5506         data = RREG32(mmRLC_CNTL);
5507         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5508                 return;
5509
5510         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5511             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5512                                AMD_PG_SUPPORT_GFX_DMG))) {
5513                 data |= RLC_GPR_REG2__REQ_MASK;
5514                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5515                 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5516                 WREG32(mmRLC_GPR_REG2, data);
5517                 adev->gfx.rlc.in_safe_mode = false;
5518         }
5519
5520         for (i = 0; i < adev->usec_timeout; i++) {
5521                 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5522                         break;
5523                 udelay(1);
5524         }
5525 }
5526
5527 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5528 {
5529         u32 data;
5530         unsigned i;
5531
5532         data = RREG32(mmRLC_CNTL);
5533         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5534                 return;
5535
5536         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5537                 data |= RLC_SAFE_MODE__CMD_MASK;
5538                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5539                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5540                 WREG32(mmRLC_SAFE_MODE, data);
5541
5542                 for (i = 0; i < adev->usec_timeout; i++) {
5543                         if ((RREG32(mmRLC_GPM_STAT) &
5544                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5545                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5546                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5547                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5548                                 break;
5549                         udelay(1);
5550                 }
5551
5552                 for (i = 0; i < adev->usec_timeout; i++) {
5553                         if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5554                                 break;
5555                         udelay(1);
5556                 }
5557                 adev->gfx.rlc.in_safe_mode = true;
5558         }
5559 }
5560
5561 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5562 {
5563         u32 data = 0;
5564         unsigned i;
5565
5566         data = RREG32(mmRLC_CNTL);
5567         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5568                 return;
5569
5570         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5571                 if (adev->gfx.rlc.in_safe_mode) {
5572                         data |= RLC_SAFE_MODE__CMD_MASK;
5573                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5574                         WREG32(mmRLC_SAFE_MODE, data);
5575                         adev->gfx.rlc.in_safe_mode = false;
5576                 }
5577         }
5578
5579         for (i = 0; i < adev->usec_timeout; i++) {
5580                 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5581                         break;
5582                 udelay(1);
5583         }
5584 }
5585
5586 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5587 {
5588         adev->gfx.rlc.in_safe_mode = true;
5589 }
5590
5591 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5592 {
5593         adev->gfx.rlc.in_safe_mode = false;
5594 }
5595
5596 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5597         .enter_safe_mode = cz_enter_rlc_safe_mode,
5598         .exit_safe_mode = cz_exit_rlc_safe_mode
5599 };
5600
5601 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5602         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5603         .exit_safe_mode = iceland_exit_rlc_safe_mode
5604 };
5605
5606 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5607         .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5608         .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5609 };
5610
5611 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5612                                                       bool enable)
5613 {
5614         uint32_t temp, data;
5615
5616         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5617
5618         /* It is disabled by HW by default */
5619         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5620                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5621                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5622                                 /* 1 - RLC memory Light sleep */
5623                                 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
5624                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5625                                 if (temp != data)
5626                                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5627                         }
5628
5629                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5630                                 /* 2 - CP memory Light sleep */
5631                                 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
5632                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5633                                 if (temp != data)
5634                                         WREG32(mmCP_MEM_SLP_CNTL, data);
5635                         }
5636                 }
5637
5638                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5639                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5640                 if (adev->flags & AMD_IS_APU)
5641                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5642                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5643                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5644                 else
5645                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5646                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5647                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5648                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5649
5650                 if (temp != data)
5651                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5652
5653                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5654                 gfx_v8_0_wait_for_rlc_serdes(adev);
5655
5656                 /* 5 - clear mgcg override */
5657                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5658
5659                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5660                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5661                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5662                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5663                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5664                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5665                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5666                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5667                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5668                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5669                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5670                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5671                         if (temp != data)
5672                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5673                 }
5674                 udelay(50);
5675
5676                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5677                 gfx_v8_0_wait_for_rlc_serdes(adev);
5678         } else {
5679                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5680                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5681                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5682                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5683                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5684                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5685                 if (temp != data)
5686                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5687
5688                 /* 2 - disable MGLS in RLC */
5689                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5690                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5691                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5692                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5693                 }
5694
5695                 /* 3 - disable MGLS in CP */
5696                 data = RREG32(mmCP_MEM_SLP_CNTL);
5697                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5698                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5699                         WREG32(mmCP_MEM_SLP_CNTL, data);
5700                 }
5701
5702                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5703                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5704                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5705                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5706                 if (temp != data)
5707                         WREG32(mmCGTS_SM_CTRL_REG, data);
5708
5709                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5710                 gfx_v8_0_wait_for_rlc_serdes(adev);
5711
5712                 /* 6 - set mgcg override */
5713                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5714
5715                 udelay(50);
5716
5717                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5718                 gfx_v8_0_wait_for_rlc_serdes(adev);
5719         }
5720
5721         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5722 }
5723
5724 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5725                                                       bool enable)
5726 {
5727         uint32_t temp, temp1, data, data1;
5728
5729         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5730
5731         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5732
5733         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5734                 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5735                  * Cmp_busy/GFX_Idle interrupts
5736                  */
5737                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5738
5739                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5740                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5741                 if (temp1 != data1)
5742                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5743
5744                 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5745                 gfx_v8_0_wait_for_rlc_serdes(adev);
5746
5747                 /* 3 - clear cgcg override */
5748                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5749
5750                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5751                 gfx_v8_0_wait_for_rlc_serdes(adev);
5752
5753                 /* 4 - write cmd to set CGLS */
5754                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5755
5756                 /* 5 - enable cgcg */
5757                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5758
5759                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5760                         /* enable cgls*/
5761                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5762
5763                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5764                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5765
5766                         if (temp1 != data1)
5767                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5768                 } else {
5769                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5770                 }
5771
5772                 if (temp != data)
5773                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5774         } else {
5775                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5776                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5777
5778                 /* TEST CGCG */
5779                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5780                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5781                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5782                 if (temp1 != data1)
5783                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5784
5785                 /* read gfx register to wake up cgcg */
5786                 RREG32(mmCB_CGTT_SCLK_CTRL);
5787                 RREG32(mmCB_CGTT_SCLK_CTRL);
5788                 RREG32(mmCB_CGTT_SCLK_CTRL);
5789                 RREG32(mmCB_CGTT_SCLK_CTRL);
5790
5791                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5792                 gfx_v8_0_wait_for_rlc_serdes(adev);
5793
5794                 /* write cmd to Set CGCG Overrride */
5795                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5796
5797                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5798                 gfx_v8_0_wait_for_rlc_serdes(adev);
5799
5800                 /* write cmd to Clear CGLS */
5801                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5802
5803                 /* disable cgcg, cgls should be disabled too. */
5804                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5805                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5806                 if (temp != data)
5807                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5808         }
5809
5810         gfx_v8_0_wait_for_rlc_serdes(adev);
5811
5812         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5813 }
5814 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5815                                             bool enable)
5816 {
5817         if (enable) {
5818                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5819                  * ===  MGCG + MGLS + TS(CG/LS) ===
5820                  */
5821                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5822                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5823         } else {
5824                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5825                  * ===  CGCG + CGLS ===
5826                  */
5827                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5828                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5829         }
5830         return 0;
5831 }
5832
5833 static int gfx_v8_0_set_clockgating_state(void *handle,
5834                                           enum amd_clockgating_state state)
5835 {
5836         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5837
5838         switch (adev->asic_type) {
5839         case CHIP_FIJI:
5840         case CHIP_CARRIZO:
5841         case CHIP_STONEY:
5842                 gfx_v8_0_update_gfx_clock_gating(adev,
5843                                                  state == AMD_CG_STATE_GATE ? true : false);
5844                 break;
5845         default:
5846                 break;
5847         }
5848         return 0;
5849 }
5850
5851 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5852 {
5853         u32 rptr;
5854
5855         rptr = ring->adev->wb.wb[ring->rptr_offs];
5856
5857         return rptr;
5858 }
5859
5860 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5861 {
5862         struct amdgpu_device *adev = ring->adev;
5863         u32 wptr;
5864
5865         if (ring->use_doorbell)
5866                 /* XXX check if swapping is necessary on BE */
5867                 wptr = ring->adev->wb.wb[ring->wptr_offs];
5868         else
5869                 wptr = RREG32(mmCP_RB0_WPTR);
5870
5871         return wptr;
5872 }
5873
5874 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5875 {
5876         struct amdgpu_device *adev = ring->adev;
5877
5878         if (ring->use_doorbell) {
5879                 /* XXX check if swapping is necessary on BE */
5880                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
5881                 WDOORBELL32(ring->doorbell_index, ring->wptr);
5882         } else {
5883                 WREG32(mmCP_RB0_WPTR, ring->wptr);
5884                 (void)RREG32(mmCP_RB0_WPTR);
5885         }
5886 }
5887
5888 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5889 {
5890         u32 ref_and_mask, reg_mem_engine;
5891
5892         if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
5893                 switch (ring->me) {
5894                 case 1:
5895                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5896                         break;
5897                 case 2:
5898                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5899                         break;
5900                 default:
5901                         return;
5902                 }
5903                 reg_mem_engine = 0;
5904         } else {
5905                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5906                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5907         }
5908
5909         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5910         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5911                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
5912                                  reg_mem_engine));
5913         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5914         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5915         amdgpu_ring_write(ring, ref_and_mask);
5916         amdgpu_ring_write(ring, ref_and_mask);
5917         amdgpu_ring_write(ring, 0x20); /* poll interval */
5918 }
5919
5920 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5921 {
5922         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5923         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5924                                  WRITE_DATA_DST_SEL(0) |
5925                                  WR_CONFIRM));
5926         amdgpu_ring_write(ring, mmHDP_DEBUG0);
5927         amdgpu_ring_write(ring, 0);
5928         amdgpu_ring_write(ring, 1);
5929
5930 }
5931
5932 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5933                                       struct amdgpu_ib *ib,
5934                                       unsigned vm_id, bool ctx_switch)
5935 {
5936         u32 header, control = 0;
5937
5938         /* insert SWITCH_BUFFER packet before first IB in the ring frame */
5939         if (ctx_switch) {
5940                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5941                 amdgpu_ring_write(ring, 0);
5942         }
5943
5944         if (ib->flags & AMDGPU_IB_FLAG_CE)
5945                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5946         else
5947                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5948
5949         control |= ib->length_dw | (vm_id << 24);
5950
5951         amdgpu_ring_write(ring, header);
5952         amdgpu_ring_write(ring,
5953 #ifdef __BIG_ENDIAN
5954                           (2 << 0) |
5955 #endif
5956                           (ib->gpu_addr & 0xFFFFFFFC));
5957         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5958         amdgpu_ring_write(ring, control);
5959 }
5960
5961 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5962                                           struct amdgpu_ib *ib,
5963                                           unsigned vm_id, bool ctx_switch)
5964 {
5965         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
5966
5967         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5968         amdgpu_ring_write(ring,
5969 #ifdef __BIG_ENDIAN
5970                                           (2 << 0) |
5971 #endif
5972                                           (ib->gpu_addr & 0xFFFFFFFC));
5973         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5974         amdgpu_ring_write(ring, control);
5975 }
5976
5977 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
5978                                          u64 seq, unsigned flags)
5979 {
5980         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5981         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5982
5983         /* EVENT_WRITE_EOP - flush caches, send int */
5984         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
5985         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5986                                  EOP_TC_ACTION_EN |
5987                                  EOP_TC_WB_ACTION_EN |
5988                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5989                                  EVENT_INDEX(5)));
5990         amdgpu_ring_write(ring, addr & 0xfffffffc);
5991         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
5992                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5993         amdgpu_ring_write(ring, lower_32_bits(seq));
5994         amdgpu_ring_write(ring, upper_32_bits(seq));
5995
5996 }
5997
5998 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5999 {
6000         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6001         uint32_t seq = ring->fence_drv.sync_seq;
6002         uint64_t addr = ring->fence_drv.gpu_addr;
6003
6004         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6005         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6006                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6007                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6008         amdgpu_ring_write(ring, addr & 0xfffffffc);
6009         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6010         amdgpu_ring_write(ring, seq);
6011         amdgpu_ring_write(ring, 0xffffffff);
6012         amdgpu_ring_write(ring, 4); /* poll interval */
6013
6014         if (usepfp) {
6015                 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
6016                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6017                 amdgpu_ring_write(ring, 0);
6018                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6019                 amdgpu_ring_write(ring, 0);
6020         }
6021 }
6022
6023 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6024                                         unsigned vm_id, uint64_t pd_addr)
6025 {
6026         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6027
6028         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6029         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6030                                  WRITE_DATA_DST_SEL(0)) |
6031                                  WR_CONFIRM);
6032         if (vm_id < 8) {
6033                 amdgpu_ring_write(ring,
6034                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6035         } else {
6036                 amdgpu_ring_write(ring,
6037                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6038         }
6039         amdgpu_ring_write(ring, 0);
6040         amdgpu_ring_write(ring, pd_addr >> 12);
6041
6042         /* bits 0-15 are the VM contexts0-15 */
6043         /* invalidate the cache */
6044         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6045         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6046                                  WRITE_DATA_DST_SEL(0)));
6047         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6048         amdgpu_ring_write(ring, 0);
6049         amdgpu_ring_write(ring, 1 << vm_id);
6050
6051         /* wait for the invalidate to complete */
6052         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6053         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6054                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6055                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6056         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6057         amdgpu_ring_write(ring, 0);
6058         amdgpu_ring_write(ring, 0); /* ref */
6059         amdgpu_ring_write(ring, 0); /* mask */
6060         amdgpu_ring_write(ring, 0x20); /* poll interval */
6061
6062         /* compute doesn't have PFP */
6063         if (usepfp) {
6064                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6065                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6066                 amdgpu_ring_write(ring, 0x0);
6067                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6068                 amdgpu_ring_write(ring, 0);
6069                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6070                 amdgpu_ring_write(ring, 0);
6071         }
6072 }
6073
6074 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
6075 {
6076         return ring->adev->wb.wb[ring->rptr_offs];
6077 }
6078
6079 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6080 {
6081         return ring->adev->wb.wb[ring->wptr_offs];
6082 }
6083
6084 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6085 {
6086         struct amdgpu_device *adev = ring->adev;
6087
6088         /* XXX check if swapping is necessary on BE */
6089         adev->wb.wb[ring->wptr_offs] = ring->wptr;
6090         WDOORBELL32(ring->doorbell_index, ring->wptr);
6091 }
6092
6093 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6094                                              u64 addr, u64 seq,
6095                                              unsigned flags)
6096 {
6097         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6098         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6099
6100         /* RELEASE_MEM - flush caches, send int */
6101         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6102         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6103                                  EOP_TC_ACTION_EN |
6104                                  EOP_TC_WB_ACTION_EN |
6105                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6106                                  EVENT_INDEX(5)));
6107         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6108         amdgpu_ring_write(ring, addr & 0xfffffffc);
6109         amdgpu_ring_write(ring, upper_32_bits(addr));
6110         amdgpu_ring_write(ring, lower_32_bits(seq));
6111         amdgpu_ring_write(ring, upper_32_bits(seq));
6112 }
6113
6114 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6115                                                  enum amdgpu_interrupt_state state)
6116 {
6117         u32 cp_int_cntl;
6118
6119         switch (state) {
6120         case AMDGPU_IRQ_STATE_DISABLE:
6121                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6122                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6123                                             TIME_STAMP_INT_ENABLE, 0);
6124                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6125                 break;
6126         case AMDGPU_IRQ_STATE_ENABLE:
6127                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6128                 cp_int_cntl =
6129                         REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6130                                       TIME_STAMP_INT_ENABLE, 1);
6131                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6132                 break;
6133         default:
6134                 break;
6135         }
6136 }
6137
6138 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6139                                                      int me, int pipe,
6140                                                      enum amdgpu_interrupt_state state)
6141 {
6142         u32 mec_int_cntl, mec_int_cntl_reg;
6143
6144         /*
6145          * amdgpu controls only pipe 0 of MEC1. That's why this function only
6146          * handles the setting of interrupts for this specific pipe. All other
6147          * pipes' interrupts are set by amdkfd.
6148          */
6149
6150         if (me == 1) {
6151                 switch (pipe) {
6152                 case 0:
6153                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6154                         break;
6155                 default:
6156                         DRM_DEBUG("invalid pipe %d\n", pipe);
6157                         return;
6158                 }
6159         } else {
6160                 DRM_DEBUG("invalid me %d\n", me);
6161                 return;
6162         }
6163
6164         switch (state) {
6165         case AMDGPU_IRQ_STATE_DISABLE:
6166                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6167                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6168                                              TIME_STAMP_INT_ENABLE, 0);
6169                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6170                 break;
6171         case AMDGPU_IRQ_STATE_ENABLE:
6172                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6173                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6174                                              TIME_STAMP_INT_ENABLE, 1);
6175                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6176                 break;
6177         default:
6178                 break;
6179         }
6180 }
6181
6182 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6183                                              struct amdgpu_irq_src *source,
6184                                              unsigned type,
6185                                              enum amdgpu_interrupt_state state)
6186 {
6187         u32 cp_int_cntl;
6188
6189         switch (state) {
6190         case AMDGPU_IRQ_STATE_DISABLE:
6191                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6192                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6193                                             PRIV_REG_INT_ENABLE, 0);
6194                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6195                 break;
6196         case AMDGPU_IRQ_STATE_ENABLE:
6197                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6198                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6199                                             PRIV_REG_INT_ENABLE, 1);
6200                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6201                 break;
6202         default:
6203                 break;
6204         }
6205
6206         return 0;
6207 }
6208
6209 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6210                                               struct amdgpu_irq_src *source,
6211                                               unsigned type,
6212                                               enum amdgpu_interrupt_state state)
6213 {
6214         u32 cp_int_cntl;
6215
6216         switch (state) {
6217         case AMDGPU_IRQ_STATE_DISABLE:
6218                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6219                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6220                                             PRIV_INSTR_INT_ENABLE, 0);
6221                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6222                 break;
6223         case AMDGPU_IRQ_STATE_ENABLE:
6224                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6225                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6226                                             PRIV_INSTR_INT_ENABLE, 1);
6227                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6228                 break;
6229         default:
6230                 break;
6231         }
6232
6233         return 0;
6234 }
6235
6236 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6237                                             struct amdgpu_irq_src *src,
6238                                             unsigned type,
6239                                             enum amdgpu_interrupt_state state)
6240 {
6241         switch (type) {
6242         case AMDGPU_CP_IRQ_GFX_EOP:
6243                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6244                 break;
6245         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6246                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6247                 break;
6248         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6249                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6250                 break;
6251         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6252                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6253                 break;
6254         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6255                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6256                 break;
6257         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6258                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6259                 break;
6260         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6261                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6262                 break;
6263         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6264                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6265                 break;
6266         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6267                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6268                 break;
6269         default:
6270                 break;
6271         }
6272         return 0;
6273 }
6274
6275 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6276                             struct amdgpu_irq_src *source,
6277                             struct amdgpu_iv_entry *entry)
6278 {
6279         int i;
6280         u8 me_id, pipe_id, queue_id;
6281         struct amdgpu_ring *ring;
6282
6283         DRM_DEBUG("IH: CP EOP\n");
6284         me_id = (entry->ring_id & 0x0c) >> 2;
6285         pipe_id = (entry->ring_id & 0x03) >> 0;
6286         queue_id = (entry->ring_id & 0x70) >> 4;
6287
6288         switch (me_id) {
6289         case 0:
6290                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6291                 break;
6292         case 1:
6293         case 2:
6294                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6295                         ring = &adev->gfx.compute_ring[i];
6296                         /* Per-queue interrupt is supported for MEC starting from VI.
6297                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6298                           */
6299                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6300                                 amdgpu_fence_process(ring);
6301                 }
6302                 break;
6303         }
6304         return 0;
6305 }
6306
6307 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6308                                  struct amdgpu_irq_src *source,
6309                                  struct amdgpu_iv_entry *entry)
6310 {
6311         DRM_ERROR("Illegal register access in command stream\n");
6312         schedule_work(&adev->reset_work);
6313         return 0;
6314 }
6315
6316 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6317                                   struct amdgpu_irq_src *source,
6318                                   struct amdgpu_iv_entry *entry)
6319 {
6320         DRM_ERROR("Illegal instruction in command stream\n");
6321         schedule_work(&adev->reset_work);
6322         return 0;
6323 }
6324
6325 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6326         .name = "gfx_v8_0",
6327         .early_init = gfx_v8_0_early_init,
6328         .late_init = gfx_v8_0_late_init,
6329         .sw_init = gfx_v8_0_sw_init,
6330         .sw_fini = gfx_v8_0_sw_fini,
6331         .hw_init = gfx_v8_0_hw_init,
6332         .hw_fini = gfx_v8_0_hw_fini,
6333         .suspend = gfx_v8_0_suspend,
6334         .resume = gfx_v8_0_resume,
6335         .is_idle = gfx_v8_0_is_idle,
6336         .wait_for_idle = gfx_v8_0_wait_for_idle,
6337         .soft_reset = gfx_v8_0_soft_reset,
6338         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6339         .set_powergating_state = gfx_v8_0_set_powergating_state,
6340 };
6341
6342 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6343         .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
6344         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6345         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6346         .parse_cs = NULL,
6347         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6348         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6349         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6350         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6351         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6352         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6353         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6354         .test_ring = gfx_v8_0_ring_test_ring,
6355         .test_ib = gfx_v8_0_ring_test_ib,
6356         .insert_nop = amdgpu_ring_insert_nop,
6357         .pad_ib = amdgpu_ring_generic_pad_ib,
6358 };
6359
6360 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6361         .get_rptr = gfx_v8_0_ring_get_rptr_compute,
6362         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6363         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6364         .parse_cs = NULL,
6365         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6366         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6367         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6368         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6369         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6370         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6371         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6372         .test_ring = gfx_v8_0_ring_test_ring,
6373         .test_ib = gfx_v8_0_ring_test_ib,
6374         .insert_nop = amdgpu_ring_insert_nop,
6375         .pad_ib = amdgpu_ring_generic_pad_ib,
6376 };
6377
6378 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6379 {
6380         int i;
6381
6382         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6383                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6384
6385         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6386                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6387 }
6388
6389 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6390         .set = gfx_v8_0_set_eop_interrupt_state,
6391         .process = gfx_v8_0_eop_irq,
6392 };
6393
6394 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6395         .set = gfx_v8_0_set_priv_reg_fault_state,
6396         .process = gfx_v8_0_priv_reg_irq,
6397 };
6398
6399 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6400         .set = gfx_v8_0_set_priv_inst_fault_state,
6401         .process = gfx_v8_0_priv_inst_irq,
6402 };
6403
6404 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6405 {
6406         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6407         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6408
6409         adev->gfx.priv_reg_irq.num_types = 1;
6410         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6411
6412         adev->gfx.priv_inst_irq.num_types = 1;
6413         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6414 }
6415
6416 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6417 {
6418         switch (adev->asic_type) {
6419         case CHIP_TOPAZ:
6420                 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6421                 break;
6422         case CHIP_STONEY:
6423         case CHIP_CARRIZO:
6424                 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6425                 break;
6426         default:
6427                 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6428                 break;
6429         }
6430 }
6431
6432 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6433 {
6434         /* init asci gds info */
6435         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6436         adev->gds.gws.total_size = 64;
6437         adev->gds.oa.total_size = 16;
6438
6439         if (adev->gds.mem.total_size == 64 * 1024) {
6440                 adev->gds.mem.gfx_partition_size = 4096;
6441                 adev->gds.mem.cs_partition_size = 4096;
6442
6443                 adev->gds.gws.gfx_partition_size = 4;
6444                 adev->gds.gws.cs_partition_size = 4;
6445
6446                 adev->gds.oa.gfx_partition_size = 4;
6447                 adev->gds.oa.cs_partition_size = 1;
6448         } else {
6449                 adev->gds.mem.gfx_partition_size = 1024;
6450                 adev->gds.mem.cs_partition_size = 1024;
6451
6452                 adev->gds.gws.gfx_partition_size = 16;
6453                 adev->gds.gws.cs_partition_size = 16;
6454
6455                 adev->gds.oa.gfx_partition_size = 4;
6456                 adev->gds.oa.cs_partition_size = 4;
6457         }
6458 }
6459
6460 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6461                                                  u32 bitmap)
6462 {
6463         u32 data;
6464
6465         if (!bitmap)
6466                 return;
6467
6468         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6469         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6470
6471         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6472 }
6473
6474 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6475 {
6476         u32 data, mask;
6477
6478         data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
6479         data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6480
6481         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6482         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6483
6484         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6485
6486         return (~data) & mask;
6487 }
6488
6489 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6490 {
6491         int i, j, k, counter, active_cu_number = 0;
6492         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6493         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6494         unsigned disable_masks[4 * 2];
6495
6496         memset(cu_info, 0, sizeof(*cu_info));
6497
6498         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6499
6500         mutex_lock(&adev->grbm_idx_mutex);
6501         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6502                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6503                         mask = 1;
6504                         ao_bitmap = 0;
6505                         counter = 0;
6506                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6507                         if (i < 4 && j < 2)
6508                                 gfx_v8_0_set_user_cu_inactive_bitmap(
6509                                         adev, disable_masks[i * 2 + j]);
6510                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6511                         cu_info->bitmap[i][j] = bitmap;
6512
6513                         for (k = 0; k < 16; k ++) {
6514                                 if (bitmap & mask) {
6515                                         if (counter < 2)
6516                                                 ao_bitmap |= mask;
6517                                         counter ++;
6518                                 }
6519                                 mask <<= 1;
6520                         }
6521                         active_cu_number += counter;
6522                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6523                 }
6524         }
6525         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6526         mutex_unlock(&adev->grbm_idx_mutex);
6527
6528         cu_info->number = active_cu_number;
6529         cu_info->ao_cu_mask = ao_cu_mask;
6530 }