drm/amdgpu: drop unnecessary cancel_delayed_work_sync on PG ungate
[platform/kernel/linux-starfive.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #include "gfx_v9_4.h"
52
53 #define GFX9_NUM_GFX_RINGS     1
54 #define GFX9_MEC_HPD_SIZE 4096
55 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
56 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
57
58 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
59 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
62 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
63 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
64
65 #define mmGCEA_PROBE_MAP                        0x070c
66 #define mmGCEA_PROBE_MAP_BASE_IDX               0
67
68 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
74
75 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
81
82 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
88
89 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
90 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
91 MODULE_FIRMWARE("amdgpu/raven_me.bin");
92 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
94 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
95
96 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
103
104 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
110 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
111
112 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
113 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
114 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
115
116 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
120 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
121 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
122
123 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
124 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
125 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
126 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
127 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
128 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
129 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
130 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
131 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
132 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
133 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
134 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
135
136 enum ta_ras_gfx_subblock {
137         /*CPC*/
138         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
139         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
140         TA_RAS_BLOCK__GFX_CPC_UCODE,
141         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
142         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
143         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
144         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
145         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
146         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
147         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
148         /* CPF*/
149         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
150         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
151         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
152         TA_RAS_BLOCK__GFX_CPF_TAG,
153         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
154         /* CPG*/
155         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
156         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
157         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
158         TA_RAS_BLOCK__GFX_CPG_TAG,
159         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
160         /* GDS*/
161         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
162         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
163         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
164         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
165         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
166         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
167         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
168         /* SPI*/
169         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
170         /* SQ*/
171         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
172         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
173         TA_RAS_BLOCK__GFX_SQ_LDS_D,
174         TA_RAS_BLOCK__GFX_SQ_LDS_I,
175         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
176         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
177         /* SQC (3 ranges)*/
178         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
179         /* SQC range 0*/
180         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
181         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
182                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
183         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
184         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
185         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
186         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
187         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
188         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
189         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
190                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
191         /* SQC range 1*/
192         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
193         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
194                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
195         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
196         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
197         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
198         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
199         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
200         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
201         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
202         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
203         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
204                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
205         /* SQC range 2*/
206         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
207         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
208                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
209         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
210         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
211         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
212         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
213         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
214         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
215         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
216         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
217         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
218                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
219         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
220         /* TA*/
221         TA_RAS_BLOCK__GFX_TA_INDEX_START,
222         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
223         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
224         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
225         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
226         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
227         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
228         /* TCA*/
229         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
230         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
231         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
232         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
233         /* TCC (5 sub-ranges)*/
234         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
235         /* TCC range 0*/
236         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
237         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
238         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
239         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
240         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
241         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
242         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
243         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
244         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
245         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
246         /* TCC range 1*/
247         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
248         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
249         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
250         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
251                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
252         /* TCC range 2*/
253         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
254         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
255         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
256         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
257         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
258         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
259         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
260         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
261         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
262         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
263                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
264         /* TCC range 3*/
265         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
266         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
267         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
268         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
269                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
270         /* TCC range 4*/
271         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
272         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
273                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
274         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
275         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
276                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
277         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
278         /* TCI*/
279         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
280         /* TCP*/
281         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
282         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
283         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
284         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
285         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
286         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
287         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
288         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
289         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
290         /* TD*/
291         TA_RAS_BLOCK__GFX_TD_INDEX_START,
292         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
293         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
294         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
295         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
296         /* EA (3 sub-ranges)*/
297         TA_RAS_BLOCK__GFX_EA_INDEX_START,
298         /* EA range 0*/
299         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
300         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
301         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
302         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
303         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
304         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
305         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
306         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
307         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
308         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
309         /* EA range 1*/
310         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
311         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
312         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
313         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
314         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
315         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
316         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
317         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
318         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
319         /* EA range 2*/
320         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
321         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
322         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
323         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
324         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
325         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
326         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
327         /* UTC VM L2 bank*/
328         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
329         /* UTC VM walker*/
330         TA_RAS_BLOCK__UTC_VML2_WALKER,
331         /* UTC ATC L2 2MB cache*/
332         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
333         /* UTC ATC L2 4KB cache*/
334         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
335         TA_RAS_BLOCK__GFX_MAX
336 };
337
338 struct ras_gfx_subblock {
339         unsigned char *name;
340         int ta_subblock;
341         int hw_supported_error_type;
342         int sw_supported_error_type;
343 };
344
345 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
346         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
347                 #subblock,                                                     \
348                 TA_RAS_BLOCK__##subblock,                                      \
349                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
350                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
351         }
352
353 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
354         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
355         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
356         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
357         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
358         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
359         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
360         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
361         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
362         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
363         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
364         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
365         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
366         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
367         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
368         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
369         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
370         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
371                              0),
372         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
373                              0),
374         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
375         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
376         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
377         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
378         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
379         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
380         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
381         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
382                              0, 0),
383         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
384                              0),
385         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
386                              0, 0),
387         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
388                              0),
389         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
390                              0, 0),
391         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
392                              0),
393         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
394                              1),
395         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
396                              0, 0, 0),
397         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
398                              0),
399         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
400                              0),
401         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
402                              0),
403         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
404                              0),
405         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
406                              0),
407         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
408                              0, 0),
409         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
410                              0),
411         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
412                              0),
413         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
414                              0, 0, 0),
415         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
416                              0),
417         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
418                              0),
419         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
420                              0),
421         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
422                              0),
423         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
424                              0),
425         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
426                              0, 0),
427         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
428                              0),
429         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
430         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
431         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
432         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
433         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
434         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
435         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
436         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
437         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
438                              1),
439         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
440                              1),
441         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
442                              1),
443         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
444                              0),
445         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
446                              0),
447         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
448         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
449         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
450         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
451         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
452         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
453         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
454         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
455         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
456         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
457         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
458         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
459                              0),
460         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
461         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
462                              0),
463         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
464                              0, 0),
465         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
466                              0),
467         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
468         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
469         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
470         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
471         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
472         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
473         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
474         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
475         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
476         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
477         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
478         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
479         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
480         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
481         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
482         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
483         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
484         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
485         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
486         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
487         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
488         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
489         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
490         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
491         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
492         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
493         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
494         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
495         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
496         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
497         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
498         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
499         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
500         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
501 };
502
503 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
504 {
505         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
506         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
507         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
508         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
509         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
510         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
511         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
512         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
513         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
514         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
515         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
516         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
517         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
518         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
519         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
520         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
521         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
522         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
523         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
524         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
525 };
526
527 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
528 {
529         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
530         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
531         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
532         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
533         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
534         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
535         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
536         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
537         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
538         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
539         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
540         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
541         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
542         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
543         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
544         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
545         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
546         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
547 };
548
549 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
550 {
551         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
552         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
553         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
554         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
555         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
556         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
557         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
558         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
559         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
560         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
561         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
562 };
563
564 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
565 {
566         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
567         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
568         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
569         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
570         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
571         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
572         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
573         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
574         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
575         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
576         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
577         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
578         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
579         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
580         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
581         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
582         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
583         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
584         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
585         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
586         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
587         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
588         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
589         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
590 };
591
592 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
593 {
594         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
595         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
596         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
597         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
598         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
599         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
600         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
601 };
602
603 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
604 {
605         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
606         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
607         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
608         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
609         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
610         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
611         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
612         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
613         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
614         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
615         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
616         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
617         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
618         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
619         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
620         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
621         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
622         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
623         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
624 };
625
626 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
627 {
628         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
629         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
630         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
631         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
632         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
633         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
634         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
635         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
636         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
637         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
638         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
639         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
640 };
641
642 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
643 {
644         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
645         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
646         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
647 };
648
649 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
650 {
651         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
652         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
653         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
654         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
655         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
656         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
657         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
658         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
659         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
660         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
661         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
662         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
663         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
664         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
665         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
666         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
667 };
668
669 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
670 {
671         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
672         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
673         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
674         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
675         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
676         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
677         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
678         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
679         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
680         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
681         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
682         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
683         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
684 };
685
686 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
687 {
688         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
689         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
690         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
691         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
692         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
693         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
694         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
695         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
696         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
697         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
698 };
699
700 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
701         {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
702         {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
703 };
704
705 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
706 {
707         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
708         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
709         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
710         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
711         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
712         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
713         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
714         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
715 };
716
717 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
718 {
719         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
720         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
721         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
722         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
723         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
724         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
725         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
726         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
727 };
728
729 void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
730 {
731         static void *scratch_reg0;
732         static void *scratch_reg1;
733         static void *scratch_reg2;
734         static void *scratch_reg3;
735         static void *spare_int;
736         static uint32_t grbm_cntl;
737         static uint32_t grbm_idx;
738
739         scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
740         scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
741         scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
742         scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
743         spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
744
745         grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
746         grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
747
748         if (amdgpu_sriov_runtime(adev)) {
749                 pr_err("shouldn't call rlcg write register during runtime\n");
750                 return;
751         }
752
753         if (offset == grbm_cntl || offset == grbm_idx) {
754                 if (offset  == grbm_cntl)
755                         writel(v, scratch_reg2);
756                 else if (offset == grbm_idx)
757                         writel(v, scratch_reg3);
758
759                 writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
760         } else {
761                 uint32_t i = 0;
762                 uint32_t retries = 50000;
763
764                 writel(v, scratch_reg0);
765                 writel(offset | 0x80000000, scratch_reg1);
766                 writel(1, spare_int);
767                 for (i = 0; i < retries; i++) {
768                         u32 tmp;
769
770                         tmp = readl(scratch_reg1);
771                         if (!(tmp & 0x80000000))
772                                 break;
773
774                         udelay(10);
775                 }
776                 if (i >= retries)
777                         pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
778         }
779
780 }
781
782 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
783 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
784 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
785 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
786
787 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
788 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
789 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
790 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
791 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
792                                  struct amdgpu_cu_info *cu_info);
793 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
794 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
795 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
796 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
797 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
798                                           void *ras_error_status);
799 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
800                                      void *inject_if);
801 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
802
803 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
804                                 uint64_t queue_mask)
805 {
806         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
807         amdgpu_ring_write(kiq_ring,
808                 PACKET3_SET_RESOURCES_VMID_MASK(0) |
809                 /* vmid_mask:0* queue_type:0 (KIQ) */
810                 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
811         amdgpu_ring_write(kiq_ring,
812                         lower_32_bits(queue_mask));     /* queue mask lo */
813         amdgpu_ring_write(kiq_ring,
814                         upper_32_bits(queue_mask));     /* queue mask hi */
815         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
816         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
817         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
818         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
819 }
820
821 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
822                                  struct amdgpu_ring *ring)
823 {
824         struct amdgpu_device *adev = kiq_ring->adev;
825         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
826         uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
827         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
828
829         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
830         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
831         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
832                          PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
833                          PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
834                          PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
835                          PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
836                          PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
837                          /*queue_type: normal compute queue */
838                          PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
839                          /* alloc format: all_on_one_pipe */
840                          PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
841                          PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
842                          /* num_queues: must be 1 */
843                          PACKET3_MAP_QUEUES_NUM_QUEUES(1));
844         amdgpu_ring_write(kiq_ring,
845                         PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
846         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
847         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
848         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
849         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
850 }
851
852 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
853                                    struct amdgpu_ring *ring,
854                                    enum amdgpu_unmap_queues_action action,
855                                    u64 gpu_addr, u64 seq)
856 {
857         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
858
859         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
860         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
861                           PACKET3_UNMAP_QUEUES_ACTION(action) |
862                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
863                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
864                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
865         amdgpu_ring_write(kiq_ring,
866                         PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
867
868         if (action == PREEMPT_QUEUES_NO_UNMAP) {
869                 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
870                 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
871                 amdgpu_ring_write(kiq_ring, seq);
872         } else {
873                 amdgpu_ring_write(kiq_ring, 0);
874                 amdgpu_ring_write(kiq_ring, 0);
875                 amdgpu_ring_write(kiq_ring, 0);
876         }
877 }
878
879 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
880                                    struct amdgpu_ring *ring,
881                                    u64 addr,
882                                    u64 seq)
883 {
884         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
885
886         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
887         amdgpu_ring_write(kiq_ring,
888                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
889                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
890                           PACKET3_QUERY_STATUS_COMMAND(2));
891         /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
892         amdgpu_ring_write(kiq_ring,
893                         PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
894                         PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
895         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
896         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
897         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
898         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
899 }
900
901 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
902                                 uint16_t pasid, uint32_t flush_type,
903                                 bool all_hub)
904 {
905         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
906         amdgpu_ring_write(kiq_ring,
907                         PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
908                         PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
909                         PACKET3_INVALIDATE_TLBS_PASID(pasid) |
910                         PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
911 }
912
913 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
914         .kiq_set_resources = gfx_v9_0_kiq_set_resources,
915         .kiq_map_queues = gfx_v9_0_kiq_map_queues,
916         .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
917         .kiq_query_status = gfx_v9_0_kiq_query_status,
918         .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
919         .set_resources_size = 8,
920         .map_queues_size = 7,
921         .unmap_queues_size = 6,
922         .query_status_size = 7,
923         .invalidate_tlbs_size = 2,
924 };
925
926 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
927 {
928         adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
929 }
930
931 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
932 {
933         switch (adev->asic_type) {
934         case CHIP_VEGA10:
935                 soc15_program_register_sequence(adev,
936                                                 golden_settings_gc_9_0,
937                                                 ARRAY_SIZE(golden_settings_gc_9_0));
938                 soc15_program_register_sequence(adev,
939                                                 golden_settings_gc_9_0_vg10,
940                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
941                 break;
942         case CHIP_VEGA12:
943                 soc15_program_register_sequence(adev,
944                                                 golden_settings_gc_9_2_1,
945                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
946                 soc15_program_register_sequence(adev,
947                                                 golden_settings_gc_9_2_1_vg12,
948                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
949                 break;
950         case CHIP_VEGA20:
951                 soc15_program_register_sequence(adev,
952                                                 golden_settings_gc_9_0,
953                                                 ARRAY_SIZE(golden_settings_gc_9_0));
954                 soc15_program_register_sequence(adev,
955                                                 golden_settings_gc_9_0_vg20,
956                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
957                 break;
958         case CHIP_ARCTURUS:
959                 soc15_program_register_sequence(adev,
960                                                 golden_settings_gc_9_4_1_arct,
961                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
962                 break;
963         case CHIP_RAVEN:
964                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
965                                                 ARRAY_SIZE(golden_settings_gc_9_1));
966                 if (adev->rev_id >= 8)
967                         soc15_program_register_sequence(adev,
968                                                         golden_settings_gc_9_1_rv2,
969                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
970                 else
971                         soc15_program_register_sequence(adev,
972                                                         golden_settings_gc_9_1_rv1,
973                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
974                 break;
975          case CHIP_RENOIR:
976                 soc15_program_register_sequence(adev,
977                                                 golden_settings_gc_9_1_rn,
978                                                 ARRAY_SIZE(golden_settings_gc_9_1_rn));
979                 return; /* for renoir, don't need common goldensetting */
980         default:
981                 break;
982         }
983
984         if (adev->asic_type != CHIP_ARCTURUS)
985                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
986                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
987 }
988
989 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
990 {
991         adev->gfx.scratch.num_reg = 8;
992         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
993         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
994 }
995
996 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
997                                        bool wc, uint32_t reg, uint32_t val)
998 {
999         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1000         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1001                                 WRITE_DATA_DST_SEL(0) |
1002                                 (wc ? WR_CONFIRM : 0));
1003         amdgpu_ring_write(ring, reg);
1004         amdgpu_ring_write(ring, 0);
1005         amdgpu_ring_write(ring, val);
1006 }
1007
1008 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1009                                   int mem_space, int opt, uint32_t addr0,
1010                                   uint32_t addr1, uint32_t ref, uint32_t mask,
1011                                   uint32_t inv)
1012 {
1013         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1014         amdgpu_ring_write(ring,
1015                                  /* memory (1) or register (0) */
1016                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1017                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
1018                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
1019                                  WAIT_REG_MEM_ENGINE(eng_sel)));
1020
1021         if (mem_space)
1022                 BUG_ON(addr0 & 0x3); /* Dword align */
1023         amdgpu_ring_write(ring, addr0);
1024         amdgpu_ring_write(ring, addr1);
1025         amdgpu_ring_write(ring, ref);
1026         amdgpu_ring_write(ring, mask);
1027         amdgpu_ring_write(ring, inv); /* poll interval */
1028 }
1029
1030 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1031 {
1032         struct amdgpu_device *adev = ring->adev;
1033         uint32_t scratch;
1034         uint32_t tmp = 0;
1035         unsigned i;
1036         int r;
1037
1038         r = amdgpu_gfx_scratch_get(adev, &scratch);
1039         if (r)
1040                 return r;
1041
1042         WREG32(scratch, 0xCAFEDEAD);
1043         r = amdgpu_ring_alloc(ring, 3);
1044         if (r)
1045                 goto error_free_scratch;
1046
1047         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1048         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
1049         amdgpu_ring_write(ring, 0xDEADBEEF);
1050         amdgpu_ring_commit(ring);
1051
1052         for (i = 0; i < adev->usec_timeout; i++) {
1053                 tmp = RREG32(scratch);
1054                 if (tmp == 0xDEADBEEF)
1055                         break;
1056                 udelay(1);
1057         }
1058
1059         if (i >= adev->usec_timeout)
1060                 r = -ETIMEDOUT;
1061
1062 error_free_scratch:
1063         amdgpu_gfx_scratch_free(adev, scratch);
1064         return r;
1065 }
1066
1067 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1068 {
1069         struct amdgpu_device *adev = ring->adev;
1070         struct amdgpu_ib ib;
1071         struct dma_fence *f = NULL;
1072
1073         unsigned index;
1074         uint64_t gpu_addr;
1075         uint32_t tmp;
1076         long r;
1077
1078         r = amdgpu_device_wb_get(adev, &index);
1079         if (r)
1080                 return r;
1081
1082         gpu_addr = adev->wb.gpu_addr + (index * 4);
1083         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1084         memset(&ib, 0, sizeof(ib));
1085         r = amdgpu_ib_get(adev, NULL, 16, &ib);
1086         if (r)
1087                 goto err1;
1088
1089         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1090         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1091         ib.ptr[2] = lower_32_bits(gpu_addr);
1092         ib.ptr[3] = upper_32_bits(gpu_addr);
1093         ib.ptr[4] = 0xDEADBEEF;
1094         ib.length_dw = 5;
1095
1096         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1097         if (r)
1098                 goto err2;
1099
1100         r = dma_fence_wait_timeout(f, false, timeout);
1101         if (r == 0) {
1102                 r = -ETIMEDOUT;
1103                 goto err2;
1104         } else if (r < 0) {
1105                 goto err2;
1106         }
1107
1108         tmp = adev->wb.wb[index];
1109         if (tmp == 0xDEADBEEF)
1110                 r = 0;
1111         else
1112                 r = -EINVAL;
1113
1114 err2:
1115         amdgpu_ib_free(adev, &ib, NULL);
1116         dma_fence_put(f);
1117 err1:
1118         amdgpu_device_wb_free(adev, index);
1119         return r;
1120 }
1121
1122
1123 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1124 {
1125         release_firmware(adev->gfx.pfp_fw);
1126         adev->gfx.pfp_fw = NULL;
1127         release_firmware(adev->gfx.me_fw);
1128         adev->gfx.me_fw = NULL;
1129         release_firmware(adev->gfx.ce_fw);
1130         adev->gfx.ce_fw = NULL;
1131         release_firmware(adev->gfx.rlc_fw);
1132         adev->gfx.rlc_fw = NULL;
1133         release_firmware(adev->gfx.mec_fw);
1134         adev->gfx.mec_fw = NULL;
1135         release_firmware(adev->gfx.mec2_fw);
1136         adev->gfx.mec2_fw = NULL;
1137
1138         kfree(adev->gfx.rlc.register_list_format);
1139 }
1140
1141 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1142 {
1143         const struct rlc_firmware_header_v2_1 *rlc_hdr;
1144
1145         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1146         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1147         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1148         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1149         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1150         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1151         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1152         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1153         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1154         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1155         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1156         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1157         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1158         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1159                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1160 }
1161
1162 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1163 {
1164         adev->gfx.me_fw_write_wait = false;
1165         adev->gfx.mec_fw_write_wait = false;
1166
1167         if ((adev->asic_type != CHIP_ARCTURUS) &&
1168             ((adev->gfx.mec_fw_version < 0x000001a5) ||
1169             (adev->gfx.mec_feature_version < 46) ||
1170             (adev->gfx.pfp_fw_version < 0x000000b7) ||
1171             (adev->gfx.pfp_feature_version < 46)))
1172                 DRM_WARN_ONCE("CP firmware version too old, please update!");
1173
1174         switch (adev->asic_type) {
1175         case CHIP_VEGA10:
1176                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1177                     (adev->gfx.me_feature_version >= 42) &&
1178                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1179                     (adev->gfx.pfp_feature_version >= 42))
1180                         adev->gfx.me_fw_write_wait = true;
1181
1182                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1183                     (adev->gfx.mec_feature_version >= 42))
1184                         adev->gfx.mec_fw_write_wait = true;
1185                 break;
1186         case CHIP_VEGA12:
1187                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1188                     (adev->gfx.me_feature_version >= 44) &&
1189                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1190                     (adev->gfx.pfp_feature_version >= 44))
1191                         adev->gfx.me_fw_write_wait = true;
1192
1193                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1194                     (adev->gfx.mec_feature_version >= 44))
1195                         adev->gfx.mec_fw_write_wait = true;
1196                 break;
1197         case CHIP_VEGA20:
1198                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1199                     (adev->gfx.me_feature_version >= 44) &&
1200                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1201                     (adev->gfx.pfp_feature_version >= 44))
1202                         adev->gfx.me_fw_write_wait = true;
1203
1204                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1205                     (adev->gfx.mec_feature_version >= 44))
1206                         adev->gfx.mec_fw_write_wait = true;
1207                 break;
1208         case CHIP_RAVEN:
1209                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1210                     (adev->gfx.me_feature_version >= 42) &&
1211                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1212                     (adev->gfx.pfp_feature_version >= 42))
1213                         adev->gfx.me_fw_write_wait = true;
1214
1215                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1216                     (adev->gfx.mec_feature_version >= 42))
1217                         adev->gfx.mec_fw_write_wait = true;
1218                 break;
1219         default:
1220                 adev->gfx.me_fw_write_wait = true;
1221                 adev->gfx.mec_fw_write_wait = true;
1222                 break;
1223         }
1224 }
1225
1226 struct amdgpu_gfxoff_quirk {
1227         u16 chip_vendor;
1228         u16 chip_device;
1229         u16 subsys_vendor;
1230         u16 subsys_device;
1231         u8 revision;
1232 };
1233
1234 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1235         /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1236         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1237         /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1238         { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1239         { 0, 0, 0, 0, 0 },
1240 };
1241
1242 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1243 {
1244         const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1245
1246         while (p && p->chip_device != 0) {
1247                 if (pdev->vendor == p->chip_vendor &&
1248                     pdev->device == p->chip_device &&
1249                     pdev->subsystem_vendor == p->subsys_vendor &&
1250                     pdev->subsystem_device == p->subsys_device &&
1251                     pdev->revision == p->revision) {
1252                         return true;
1253                 }
1254                 ++p;
1255         }
1256         return false;
1257 }
1258
1259 static bool is_raven_kicker(struct amdgpu_device *adev)
1260 {
1261         if (adev->pm.fw_version >= 0x41e2b)
1262                 return true;
1263         else
1264                 return false;
1265 }
1266
1267 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1268 {
1269         if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1270                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1271
1272         switch (adev->asic_type) {
1273         case CHIP_VEGA10:
1274         case CHIP_VEGA12:
1275         case CHIP_VEGA20:
1276                 break;
1277         case CHIP_RAVEN:
1278                 if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) &&
1279                     ((!is_raven_kicker(adev) &&
1280                       adev->gfx.rlc_fw_version < 531) ||
1281                      (adev->gfx.rlc_feature_version < 1) ||
1282                      !adev->gfx.rlc.is_rlc_v2_1))
1283                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1284
1285                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1286                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1287                                 AMD_PG_SUPPORT_CP |
1288                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1289                 break;
1290         case CHIP_RENOIR:
1291                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1292                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1293                                 AMD_PG_SUPPORT_CP |
1294                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1295                 break;
1296         default:
1297                 break;
1298         }
1299 }
1300
1301 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1302                                           const char *chip_name)
1303 {
1304         char fw_name[30];
1305         int err;
1306         struct amdgpu_firmware_info *info = NULL;
1307         const struct common_firmware_header *header = NULL;
1308         const struct gfx_firmware_header_v1_0 *cp_hdr;
1309
1310         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1311         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1312         if (err)
1313                 goto out;
1314         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1315         if (err)
1316                 goto out;
1317         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1318         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1319         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1320
1321         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1322         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1323         if (err)
1324                 goto out;
1325         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1326         if (err)
1327                 goto out;
1328         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1329         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1330         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1331
1332         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1333         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1334         if (err)
1335                 goto out;
1336         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1337         if (err)
1338                 goto out;
1339         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1340         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1341         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1342
1343         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1344                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1345                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1346                 info->fw = adev->gfx.pfp_fw;
1347                 header = (const struct common_firmware_header *)info->fw->data;
1348                 adev->firmware.fw_size +=
1349                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1350
1351                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1352                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1353                 info->fw = adev->gfx.me_fw;
1354                 header = (const struct common_firmware_header *)info->fw->data;
1355                 adev->firmware.fw_size +=
1356                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1357
1358                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1359                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1360                 info->fw = adev->gfx.ce_fw;
1361                 header = (const struct common_firmware_header *)info->fw->data;
1362                 adev->firmware.fw_size +=
1363                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1364         }
1365
1366 out:
1367         if (err) {
1368                 dev_err(adev->dev,
1369                         "gfx9: Failed to load firmware \"%s\"\n",
1370                         fw_name);
1371                 release_firmware(adev->gfx.pfp_fw);
1372                 adev->gfx.pfp_fw = NULL;
1373                 release_firmware(adev->gfx.me_fw);
1374                 adev->gfx.me_fw = NULL;
1375                 release_firmware(adev->gfx.ce_fw);
1376                 adev->gfx.ce_fw = NULL;
1377         }
1378         return err;
1379 }
1380
1381 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1382                                           const char *chip_name)
1383 {
1384         char fw_name[30];
1385         int err;
1386         struct amdgpu_firmware_info *info = NULL;
1387         const struct common_firmware_header *header = NULL;
1388         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1389         unsigned int *tmp = NULL;
1390         unsigned int i = 0;
1391         uint16_t version_major;
1392         uint16_t version_minor;
1393         uint32_t smu_version;
1394
1395         /*
1396          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1397          * instead of picasso_rlc.bin.
1398          * Judgment method:
1399          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1400          *          or revision >= 0xD8 && revision <= 0xDF
1401          * otherwise is PCO FP5
1402          */
1403         if (!strcmp(chip_name, "picasso") &&
1404                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1405                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1406                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1407         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1408                 (smu_version >= 0x41e2b))
1409                 /**
1410                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1411                 */
1412                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1413         else
1414                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1415         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1416         if (err)
1417                 goto out;
1418         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1419         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1420
1421         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1422         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1423         if (version_major == 2 && version_minor == 1)
1424                 adev->gfx.rlc.is_rlc_v2_1 = true;
1425
1426         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1427         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1428         adev->gfx.rlc.save_and_restore_offset =
1429                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1430         adev->gfx.rlc.clear_state_descriptor_offset =
1431                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1432         adev->gfx.rlc.avail_scratch_ram_locations =
1433                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1434         adev->gfx.rlc.reg_restore_list_size =
1435                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1436         adev->gfx.rlc.reg_list_format_start =
1437                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1438         adev->gfx.rlc.reg_list_format_separate_start =
1439                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1440         adev->gfx.rlc.starting_offsets_start =
1441                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1442         adev->gfx.rlc.reg_list_format_size_bytes =
1443                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1444         adev->gfx.rlc.reg_list_size_bytes =
1445                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1446         adev->gfx.rlc.register_list_format =
1447                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1448                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1449         if (!adev->gfx.rlc.register_list_format) {
1450                 err = -ENOMEM;
1451                 goto out;
1452         }
1453
1454         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1455                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1456         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1457                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1458
1459         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1460
1461         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1462                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1463         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1464                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1465
1466         if (adev->gfx.rlc.is_rlc_v2_1)
1467                 gfx_v9_0_init_rlc_ext_microcode(adev);
1468
1469         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1470                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1471                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1472                 info->fw = adev->gfx.rlc_fw;
1473                 header = (const struct common_firmware_header *)info->fw->data;
1474                 adev->firmware.fw_size +=
1475                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1476
1477                 if (adev->gfx.rlc.is_rlc_v2_1 &&
1478                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1479                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1480                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1481                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1482                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1483                         info->fw = adev->gfx.rlc_fw;
1484                         adev->firmware.fw_size +=
1485                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1486
1487                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1488                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1489                         info->fw = adev->gfx.rlc_fw;
1490                         adev->firmware.fw_size +=
1491                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1492
1493                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1494                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1495                         info->fw = adev->gfx.rlc_fw;
1496                         adev->firmware.fw_size +=
1497                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1498                 }
1499         }
1500
1501 out:
1502         if (err) {
1503                 dev_err(adev->dev,
1504                         "gfx9: Failed to load firmware \"%s\"\n",
1505                         fw_name);
1506                 release_firmware(adev->gfx.rlc_fw);
1507                 adev->gfx.rlc_fw = NULL;
1508         }
1509         return err;
1510 }
1511
1512 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1513                                           const char *chip_name)
1514 {
1515         char fw_name[30];
1516         int err;
1517         struct amdgpu_firmware_info *info = NULL;
1518         const struct common_firmware_header *header = NULL;
1519         const struct gfx_firmware_header_v1_0 *cp_hdr;
1520
1521         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1522         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1523         if (err)
1524                 goto out;
1525         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1526         if (err)
1527                 goto out;
1528         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1529         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1530         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1531
1532
1533         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1534         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1535         if (!err) {
1536                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1537                 if (err)
1538                         goto out;
1539                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1540                 adev->gfx.mec2_fw->data;
1541                 adev->gfx.mec2_fw_version =
1542                 le32_to_cpu(cp_hdr->header.ucode_version);
1543                 adev->gfx.mec2_feature_version =
1544                 le32_to_cpu(cp_hdr->ucode_feature_version);
1545         } else {
1546                 err = 0;
1547                 adev->gfx.mec2_fw = NULL;
1548         }
1549
1550         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1551                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1552                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1553                 info->fw = adev->gfx.mec_fw;
1554                 header = (const struct common_firmware_header *)info->fw->data;
1555                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1556                 adev->firmware.fw_size +=
1557                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1558
1559                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1560                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1561                 info->fw = adev->gfx.mec_fw;
1562                 adev->firmware.fw_size +=
1563                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1564
1565                 if (adev->gfx.mec2_fw) {
1566                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1567                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1568                         info->fw = adev->gfx.mec2_fw;
1569                         header = (const struct common_firmware_header *)info->fw->data;
1570                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1571                         adev->firmware.fw_size +=
1572                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1573
1574                         /* TODO: Determine if MEC2 JT FW loading can be removed
1575                                  for all GFX V9 asic and above */
1576                         if (adev->asic_type != CHIP_ARCTURUS &&
1577                             adev->asic_type != CHIP_RENOIR) {
1578                                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1579                                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1580                                 info->fw = adev->gfx.mec2_fw;
1581                                 adev->firmware.fw_size +=
1582                                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1583                                         PAGE_SIZE);
1584                         }
1585                 }
1586         }
1587
1588 out:
1589         gfx_v9_0_check_if_need_gfxoff(adev);
1590         gfx_v9_0_check_fw_write_wait(adev);
1591         if (err) {
1592                 dev_err(adev->dev,
1593                         "gfx9: Failed to load firmware \"%s\"\n",
1594                         fw_name);
1595                 release_firmware(adev->gfx.mec_fw);
1596                 adev->gfx.mec_fw = NULL;
1597                 release_firmware(adev->gfx.mec2_fw);
1598                 adev->gfx.mec2_fw = NULL;
1599         }
1600         return err;
1601 }
1602
1603 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1604 {
1605         const char *chip_name;
1606         int r;
1607
1608         DRM_DEBUG("\n");
1609
1610         switch (adev->asic_type) {
1611         case CHIP_VEGA10:
1612                 chip_name = "vega10";
1613                 break;
1614         case CHIP_VEGA12:
1615                 chip_name = "vega12";
1616                 break;
1617         case CHIP_VEGA20:
1618                 chip_name = "vega20";
1619                 break;
1620         case CHIP_RAVEN:
1621                 if (adev->rev_id >= 8)
1622                         chip_name = "raven2";
1623                 else if (adev->pdev->device == 0x15d8)
1624                         chip_name = "picasso";
1625                 else
1626                         chip_name = "raven";
1627                 break;
1628         case CHIP_ARCTURUS:
1629                 chip_name = "arcturus";
1630                 break;
1631         case CHIP_RENOIR:
1632                 chip_name = "renoir";
1633                 break;
1634         default:
1635                 BUG();
1636         }
1637
1638         /* No CPG in Arcturus */
1639         if (adev->asic_type != CHIP_ARCTURUS) {
1640                 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1641                 if (r)
1642                         return r;
1643         }
1644
1645         r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1646         if (r)
1647                 return r;
1648
1649         r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1650         if (r)
1651                 return r;
1652
1653         return r;
1654 }
1655
1656 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1657 {
1658         u32 count = 0;
1659         const struct cs_section_def *sect = NULL;
1660         const struct cs_extent_def *ext = NULL;
1661
1662         /* begin clear state */
1663         count += 2;
1664         /* context control state */
1665         count += 3;
1666
1667         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1668                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1669                         if (sect->id == SECT_CONTEXT)
1670                                 count += 2 + ext->reg_count;
1671                         else
1672                                 return 0;
1673                 }
1674         }
1675
1676         /* end clear state */
1677         count += 2;
1678         /* clear state */
1679         count += 2;
1680
1681         return count;
1682 }
1683
1684 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1685                                     volatile u32 *buffer)
1686 {
1687         u32 count = 0, i;
1688         const struct cs_section_def *sect = NULL;
1689         const struct cs_extent_def *ext = NULL;
1690
1691         if (adev->gfx.rlc.cs_data == NULL)
1692                 return;
1693         if (buffer == NULL)
1694                 return;
1695
1696         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1697         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1698
1699         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1700         buffer[count++] = cpu_to_le32(0x80000000);
1701         buffer[count++] = cpu_to_le32(0x80000000);
1702
1703         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1704                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1705                         if (sect->id == SECT_CONTEXT) {
1706                                 buffer[count++] =
1707                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1708                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1709                                                 PACKET3_SET_CONTEXT_REG_START);
1710                                 for (i = 0; i < ext->reg_count; i++)
1711                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1712                         } else {
1713                                 return;
1714                         }
1715                 }
1716         }
1717
1718         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1719         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1720
1721         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1722         buffer[count++] = cpu_to_le32(0);
1723 }
1724
1725 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1726 {
1727         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1728         uint32_t pg_always_on_cu_num = 2;
1729         uint32_t always_on_cu_num;
1730         uint32_t i, j, k;
1731         uint32_t mask, cu_bitmap, counter;
1732
1733         if (adev->flags & AMD_IS_APU)
1734                 always_on_cu_num = 4;
1735         else if (adev->asic_type == CHIP_VEGA12)
1736                 always_on_cu_num = 8;
1737         else
1738                 always_on_cu_num = 12;
1739
1740         mutex_lock(&adev->grbm_idx_mutex);
1741         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1742                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1743                         mask = 1;
1744                         cu_bitmap = 0;
1745                         counter = 0;
1746                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1747
1748                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1749                                 if (cu_info->bitmap[i][j] & mask) {
1750                                         if (counter == pg_always_on_cu_num)
1751                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1752                                         if (counter < always_on_cu_num)
1753                                                 cu_bitmap |= mask;
1754                                         else
1755                                                 break;
1756                                         counter++;
1757                                 }
1758                                 mask <<= 1;
1759                         }
1760
1761                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1762                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1763                 }
1764         }
1765         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1766         mutex_unlock(&adev->grbm_idx_mutex);
1767 }
1768
1769 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1770 {
1771         uint32_t data;
1772
1773         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1774         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1775         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1776         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1777         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1778
1779         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1780         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1781
1782         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1783         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1784
1785         mutex_lock(&adev->grbm_idx_mutex);
1786         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1787         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1788         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1789
1790         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1791         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1792         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1793         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1794         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1795
1796         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1797         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1798         data &= 0x0000FFFF;
1799         data |= 0x00C00000;
1800         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1801
1802         /*
1803          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1804          * programmed in gfx_v9_0_init_always_on_cu_mask()
1805          */
1806
1807         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1808          * but used for RLC_LB_CNTL configuration */
1809         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1810         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1811         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1812         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1813         mutex_unlock(&adev->grbm_idx_mutex);
1814
1815         gfx_v9_0_init_always_on_cu_mask(adev);
1816 }
1817
1818 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1819 {
1820         uint32_t data;
1821
1822         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1823         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1824         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1825         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1826         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1827
1828         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1829         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1830
1831         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1832         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1833
1834         mutex_lock(&adev->grbm_idx_mutex);
1835         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1836         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1837         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1838
1839         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1840         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1841         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1842         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1843         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1844
1845         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1846         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1847         data &= 0x0000FFFF;
1848         data |= 0x00C00000;
1849         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1850
1851         /*
1852          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1853          * programmed in gfx_v9_0_init_always_on_cu_mask()
1854          */
1855
1856         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1857          * but used for RLC_LB_CNTL configuration */
1858         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1859         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1860         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1861         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1862         mutex_unlock(&adev->grbm_idx_mutex);
1863
1864         gfx_v9_0_init_always_on_cu_mask(adev);
1865 }
1866
1867 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1868 {
1869         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1870 }
1871
1872 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1873 {
1874         return 5;
1875 }
1876
1877 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1878 {
1879         const struct cs_section_def *cs_data;
1880         int r;
1881
1882         adev->gfx.rlc.cs_data = gfx9_cs_data;
1883
1884         cs_data = adev->gfx.rlc.cs_data;
1885
1886         if (cs_data) {
1887                 /* init clear state block */
1888                 r = amdgpu_gfx_rlc_init_csb(adev);
1889                 if (r)
1890                         return r;
1891         }
1892
1893         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1894                 /* TODO: double check the cp_table_size for RV */
1895                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1896                 r = amdgpu_gfx_rlc_init_cpt(adev);
1897                 if (r)
1898                         return r;
1899         }
1900
1901         switch (adev->asic_type) {
1902         case CHIP_RAVEN:
1903                 gfx_v9_0_init_lbpw(adev);
1904                 break;
1905         case CHIP_VEGA20:
1906                 gfx_v9_4_init_lbpw(adev);
1907                 break;
1908         default:
1909                 break;
1910         }
1911
1912         /* init spm vmid with 0xf */
1913         if (adev->gfx.rlc.funcs->update_spm_vmid)
1914                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1915
1916         return 0;
1917 }
1918
1919 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1920 {
1921         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1922         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1923 }
1924
1925 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1926 {
1927         int r;
1928         u32 *hpd;
1929         const __le32 *fw_data;
1930         unsigned fw_size;
1931         u32 *fw;
1932         size_t mec_hpd_size;
1933
1934         const struct gfx_firmware_header_v1_0 *mec_hdr;
1935
1936         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1937
1938         /* take ownership of the relevant compute queues */
1939         amdgpu_gfx_compute_queue_acquire(adev);
1940         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1941
1942         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1943                                       AMDGPU_GEM_DOMAIN_VRAM,
1944                                       &adev->gfx.mec.hpd_eop_obj,
1945                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1946                                       (void **)&hpd);
1947         if (r) {
1948                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1949                 gfx_v9_0_mec_fini(adev);
1950                 return r;
1951         }
1952
1953         memset(hpd, 0, mec_hpd_size);
1954
1955         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1956         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1957
1958         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1959
1960         fw_data = (const __le32 *)
1961                 (adev->gfx.mec_fw->data +
1962                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1963         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1964
1965         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1966                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1967                                       &adev->gfx.mec.mec_fw_obj,
1968                                       &adev->gfx.mec.mec_fw_gpu_addr,
1969                                       (void **)&fw);
1970         if (r) {
1971                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1972                 gfx_v9_0_mec_fini(adev);
1973                 return r;
1974         }
1975
1976         memcpy(fw, fw_data, fw_size);
1977
1978         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1979         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1980
1981         return 0;
1982 }
1983
1984 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1985 {
1986         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1987                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1988                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1989                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1990                 (SQ_IND_INDEX__FORCE_READ_MASK));
1991         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1992 }
1993
1994 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1995                            uint32_t wave, uint32_t thread,
1996                            uint32_t regno, uint32_t num, uint32_t *out)
1997 {
1998         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1999                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2000                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2001                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
2002                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
2003                 (SQ_IND_INDEX__FORCE_READ_MASK) |
2004                 (SQ_IND_INDEX__AUTO_INCR_MASK));
2005         while (num--)
2006                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2007 }
2008
2009 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
2010 {
2011         /* type 1 wave data */
2012         dst[(*no_fields)++] = 1;
2013         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
2014         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
2015         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
2016         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
2017         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
2018         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
2019         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
2020         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
2021         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
2022         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
2023         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
2024         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
2025         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
2026         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
2027 }
2028
2029 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
2030                                      uint32_t wave, uint32_t start,
2031                                      uint32_t size, uint32_t *dst)
2032 {
2033         wave_read_regs(
2034                 adev, simd, wave, 0,
2035                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
2036 }
2037
2038 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
2039                                      uint32_t wave, uint32_t thread,
2040                                      uint32_t start, uint32_t size,
2041                                      uint32_t *dst)
2042 {
2043         wave_read_regs(
2044                 adev, simd, wave, thread,
2045                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
2046 }
2047
2048 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
2049                                   u32 me, u32 pipe, u32 q, u32 vm)
2050 {
2051         soc15_grbm_select(adev, me, pipe, q, vm);
2052 }
2053
2054 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2055         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2056         .select_se_sh = &gfx_v9_0_select_se_sh,
2057         .read_wave_data = &gfx_v9_0_read_wave_data,
2058         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2059         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2060         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2061         .ras_error_inject = &gfx_v9_0_ras_error_inject,
2062         .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2063         .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2064 };
2065
2066 static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = {
2067         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2068         .select_se_sh = &gfx_v9_0_select_se_sh,
2069         .read_wave_data = &gfx_v9_0_read_wave_data,
2070         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2071         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2072         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2073         .ras_error_inject = &gfx_v9_4_ras_error_inject,
2074         .query_ras_error_count = &gfx_v9_4_query_ras_error_count,
2075         .reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
2076 };
2077
2078 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2079 {
2080         u32 gb_addr_config;
2081         int err;
2082
2083         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2084
2085         switch (adev->asic_type) {
2086         case CHIP_VEGA10:
2087                 adev->gfx.config.max_hw_contexts = 8;
2088                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2089                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2090                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2091                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2092                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2093                 break;
2094         case CHIP_VEGA12:
2095                 adev->gfx.config.max_hw_contexts = 8;
2096                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2097                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2098                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2099                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2100                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2101                 DRM_INFO("fix gfx.config for vega12\n");
2102                 break;
2103         case CHIP_VEGA20:
2104                 adev->gfx.config.max_hw_contexts = 8;
2105                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2106                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2107                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2108                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2109                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2110                 gb_addr_config &= ~0xf3e777ff;
2111                 gb_addr_config |= 0x22014042;
2112                 /* check vbios table if gpu info is not available */
2113                 err = amdgpu_atomfirmware_get_gfx_info(adev);
2114                 if (err)
2115                         return err;
2116                 break;
2117         case CHIP_RAVEN:
2118                 adev->gfx.config.max_hw_contexts = 8;
2119                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2120                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2121                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2122                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2123                 if (adev->rev_id >= 8)
2124                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2125                 else
2126                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2127                 break;
2128         case CHIP_ARCTURUS:
2129                 adev->gfx.funcs = &gfx_v9_4_gfx_funcs;
2130                 adev->gfx.config.max_hw_contexts = 8;
2131                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2132                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2133                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2134                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2135                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2136                 gb_addr_config &= ~0xf3e777ff;
2137                 gb_addr_config |= 0x22014042;
2138                 break;
2139         case CHIP_RENOIR:
2140                 adev->gfx.config.max_hw_contexts = 8;
2141                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2142                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2143                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2144                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2145                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2146                 gb_addr_config &= ~0xf3e777ff;
2147                 gb_addr_config |= 0x22010042;
2148                 break;
2149         default:
2150                 BUG();
2151                 break;
2152         }
2153
2154         adev->gfx.config.gb_addr_config = gb_addr_config;
2155
2156         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2157                         REG_GET_FIELD(
2158                                         adev->gfx.config.gb_addr_config,
2159                                         GB_ADDR_CONFIG,
2160                                         NUM_PIPES);
2161
2162         adev->gfx.config.max_tile_pipes =
2163                 adev->gfx.config.gb_addr_config_fields.num_pipes;
2164
2165         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2166                         REG_GET_FIELD(
2167                                         adev->gfx.config.gb_addr_config,
2168                                         GB_ADDR_CONFIG,
2169                                         NUM_BANKS);
2170         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2171                         REG_GET_FIELD(
2172                                         adev->gfx.config.gb_addr_config,
2173                                         GB_ADDR_CONFIG,
2174                                         MAX_COMPRESSED_FRAGS);
2175         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2176                         REG_GET_FIELD(
2177                                         adev->gfx.config.gb_addr_config,
2178                                         GB_ADDR_CONFIG,
2179                                         NUM_RB_PER_SE);
2180         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2181                         REG_GET_FIELD(
2182                                         adev->gfx.config.gb_addr_config,
2183                                         GB_ADDR_CONFIG,
2184                                         NUM_SHADER_ENGINES);
2185         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2186                         REG_GET_FIELD(
2187                                         adev->gfx.config.gb_addr_config,
2188                                         GB_ADDR_CONFIG,
2189                                         PIPE_INTERLEAVE_SIZE));
2190
2191         return 0;
2192 }
2193
2194 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2195                                       int mec, int pipe, int queue)
2196 {
2197         int r;
2198         unsigned irq_type;
2199         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2200
2201         ring = &adev->gfx.compute_ring[ring_id];
2202
2203         /* mec0 is me1 */
2204         ring->me = mec + 1;
2205         ring->pipe = pipe;
2206         ring->queue = queue;
2207
2208         ring->ring_obj = NULL;
2209         ring->use_doorbell = true;
2210         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2211         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2212                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2213         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2214
2215         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2216                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2217                 + ring->pipe;
2218
2219         /* type-2 packets are deprecated on MEC, use type-3 instead */
2220         r = amdgpu_ring_init(adev, ring, 1024,
2221                              &adev->gfx.eop_irq, irq_type);
2222         if (r)
2223                 return r;
2224
2225
2226         return 0;
2227 }
2228
2229 static int gfx_v9_0_sw_init(void *handle)
2230 {
2231         int i, j, k, r, ring_id;
2232         struct amdgpu_ring *ring;
2233         struct amdgpu_kiq *kiq;
2234         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2235
2236         switch (adev->asic_type) {
2237         case CHIP_VEGA10:
2238         case CHIP_VEGA12:
2239         case CHIP_VEGA20:
2240         case CHIP_RAVEN:
2241         case CHIP_ARCTURUS:
2242         case CHIP_RENOIR:
2243                 adev->gfx.mec.num_mec = 2;
2244                 break;
2245         default:
2246                 adev->gfx.mec.num_mec = 1;
2247                 break;
2248         }
2249
2250         adev->gfx.mec.num_pipe_per_mec = 4;
2251         adev->gfx.mec.num_queue_per_pipe = 8;
2252
2253         /* EOP Event */
2254         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2255         if (r)
2256                 return r;
2257
2258         /* Privileged reg */
2259         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2260                               &adev->gfx.priv_reg_irq);
2261         if (r)
2262                 return r;
2263
2264         /* Privileged inst */
2265         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2266                               &adev->gfx.priv_inst_irq);
2267         if (r)
2268                 return r;
2269
2270         /* ECC error */
2271         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2272                               &adev->gfx.cp_ecc_error_irq);
2273         if (r)
2274                 return r;
2275
2276         /* FUE error */
2277         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2278                               &adev->gfx.cp_ecc_error_irq);
2279         if (r)
2280                 return r;
2281
2282         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2283
2284         gfx_v9_0_scratch_init(adev);
2285
2286         r = gfx_v9_0_init_microcode(adev);
2287         if (r) {
2288                 DRM_ERROR("Failed to load gfx firmware!\n");
2289                 return r;
2290         }
2291
2292         r = adev->gfx.rlc.funcs->init(adev);
2293         if (r) {
2294                 DRM_ERROR("Failed to init rlc BOs!\n");
2295                 return r;
2296         }
2297
2298         r = gfx_v9_0_mec_init(adev);
2299         if (r) {
2300                 DRM_ERROR("Failed to init MEC BOs!\n");
2301                 return r;
2302         }
2303
2304         /* set up the gfx ring */
2305         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2306                 ring = &adev->gfx.gfx_ring[i];
2307                 ring->ring_obj = NULL;
2308                 if (!i)
2309                         sprintf(ring->name, "gfx");
2310                 else
2311                         sprintf(ring->name, "gfx_%d", i);
2312                 ring->use_doorbell = true;
2313                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2314                 r = amdgpu_ring_init(adev, ring, 1024,
2315                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2316                 if (r)
2317                         return r;
2318         }
2319
2320         /* set up the compute queues - allocate horizontally across pipes */
2321         ring_id = 0;
2322         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2323                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2324                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2325                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2326                                         continue;
2327
2328                                 r = gfx_v9_0_compute_ring_init(adev,
2329                                                                ring_id,
2330                                                                i, k, j);
2331                                 if (r)
2332                                         return r;
2333
2334                                 ring_id++;
2335                         }
2336                 }
2337         }
2338
2339         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2340         if (r) {
2341                 DRM_ERROR("Failed to init KIQ BOs!\n");
2342                 return r;
2343         }
2344
2345         kiq = &adev->gfx.kiq;
2346         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2347         if (r)
2348                 return r;
2349
2350         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2351         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2352         if (r)
2353                 return r;
2354
2355         adev->gfx.ce_ram_size = 0x8000;
2356
2357         r = gfx_v9_0_gpu_early_init(adev);
2358         if (r)
2359                 return r;
2360
2361         return 0;
2362 }
2363
2364
2365 static int gfx_v9_0_sw_fini(void *handle)
2366 {
2367         int i;
2368         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2369
2370         amdgpu_gfx_ras_fini(adev);
2371
2372         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2373                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2374         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2375                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2376
2377         amdgpu_gfx_mqd_sw_fini(adev);
2378         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2379         amdgpu_gfx_kiq_fini(adev);
2380
2381         gfx_v9_0_mec_fini(adev);
2382         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2383         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2384                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2385                                 &adev->gfx.rlc.cp_table_gpu_addr,
2386                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2387         }
2388         gfx_v9_0_free_microcode(adev);
2389
2390         return 0;
2391 }
2392
2393
2394 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2395 {
2396         /* TODO */
2397 }
2398
2399 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2400 {
2401         u32 data;
2402
2403         if (instance == 0xffffffff)
2404                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2405         else
2406                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2407
2408         if (se_num == 0xffffffff)
2409                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2410         else
2411                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2412
2413         if (sh_num == 0xffffffff)
2414                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2415         else
2416                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2417
2418         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2419 }
2420
2421 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2422 {
2423         u32 data, mask;
2424
2425         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2426         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2427
2428         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2429         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2430
2431         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2432                                          adev->gfx.config.max_sh_per_se);
2433
2434         return (~data) & mask;
2435 }
2436
2437 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2438 {
2439         int i, j;
2440         u32 data;
2441         u32 active_rbs = 0;
2442         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2443                                         adev->gfx.config.max_sh_per_se;
2444
2445         mutex_lock(&adev->grbm_idx_mutex);
2446         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2447                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2448                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2449                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2450                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2451                                                rb_bitmap_width_per_sh);
2452                 }
2453         }
2454         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2455         mutex_unlock(&adev->grbm_idx_mutex);
2456
2457         adev->gfx.config.backend_enable_mask = active_rbs;
2458         adev->gfx.config.num_rbs = hweight32(active_rbs);
2459 }
2460
2461 #define DEFAULT_SH_MEM_BASES    (0x6000)
2462 #define FIRST_COMPUTE_VMID      (8)
2463 #define LAST_COMPUTE_VMID       (16)
2464 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2465 {
2466         int i;
2467         uint32_t sh_mem_config;
2468         uint32_t sh_mem_bases;
2469
2470         /*
2471          * Configure apertures:
2472          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2473          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2474          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2475          */
2476         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2477
2478         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2479                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2480                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2481
2482         mutex_lock(&adev->srbm_mutex);
2483         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2484                 soc15_grbm_select(adev, 0, 0, 0, i);
2485                 /* CP and shaders */
2486                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2487                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2488         }
2489         soc15_grbm_select(adev, 0, 0, 0, 0);
2490         mutex_unlock(&adev->srbm_mutex);
2491
2492         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2493            acccess. These should be enabled by FW for target VMIDs. */
2494         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2495                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2496                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2497                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2498                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2499         }
2500 }
2501
2502 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2503 {
2504         int vmid;
2505
2506         /*
2507          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2508          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2509          * the driver can enable them for graphics. VMID0 should maintain
2510          * access so that HWS firmware can save/restore entries.
2511          */
2512         for (vmid = 1; vmid < 16; vmid++) {
2513                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2514                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2515                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2516                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2517         }
2518 }
2519
2520 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2521 {
2522         uint32_t tmp;
2523
2524         switch (adev->asic_type) {
2525         case CHIP_ARCTURUS:
2526                 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2527                 tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2528                                         DISABLE_BARRIER_WAITCNT, 1);
2529                 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2530                 break;
2531         default:
2532                 break;
2533         };
2534 }
2535
2536 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2537 {
2538         u32 tmp;
2539         int i;
2540
2541         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2542
2543         gfx_v9_0_tiling_mode_table_init(adev);
2544
2545         gfx_v9_0_setup_rb(adev);
2546         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2547         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2548
2549         /* XXX SH_MEM regs */
2550         /* where to put LDS, scratch, GPUVM in FSA64 space */
2551         mutex_lock(&adev->srbm_mutex);
2552         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2553                 soc15_grbm_select(adev, 0, 0, 0, i);
2554                 /* CP and shaders */
2555                 if (i == 0) {
2556                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2557                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2558                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2559                                             !!amdgpu_noretry);
2560                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2561                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2562                 } else {
2563                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2564                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2565                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2566                                             !!amdgpu_noretry);
2567                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2568                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2569                                 (adev->gmc.private_aperture_start >> 48));
2570                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2571                                 (adev->gmc.shared_aperture_start >> 48));
2572                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2573                 }
2574         }
2575         soc15_grbm_select(adev, 0, 0, 0, 0);
2576
2577         mutex_unlock(&adev->srbm_mutex);
2578
2579         gfx_v9_0_init_compute_vmid(adev);
2580         gfx_v9_0_init_gds_vmid(adev);
2581         gfx_v9_0_init_sq_config(adev);
2582 }
2583
2584 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2585 {
2586         u32 i, j, k;
2587         u32 mask;
2588
2589         mutex_lock(&adev->grbm_idx_mutex);
2590         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2591                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2592                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2593                         for (k = 0; k < adev->usec_timeout; k++) {
2594                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2595                                         break;
2596                                 udelay(1);
2597                         }
2598                         if (k == adev->usec_timeout) {
2599                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2600                                                       0xffffffff, 0xffffffff);
2601                                 mutex_unlock(&adev->grbm_idx_mutex);
2602                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2603                                          i, j);
2604                                 return;
2605                         }
2606                 }
2607         }
2608         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2609         mutex_unlock(&adev->grbm_idx_mutex);
2610
2611         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2612                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2613                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2614                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2615         for (k = 0; k < adev->usec_timeout; k++) {
2616                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2617                         break;
2618                 udelay(1);
2619         }
2620 }
2621
2622 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2623                                                bool enable)
2624 {
2625         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2626
2627         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2628         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2629         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2630         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2631
2632         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2633 }
2634
2635 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2636 {
2637         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2638         /* csib */
2639         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2640                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2641         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2642                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2643         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2644                         adev->gfx.rlc.clear_state_size);
2645 }
2646
2647 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2648                                 int indirect_offset,
2649                                 int list_size,
2650                                 int *unique_indirect_regs,
2651                                 int unique_indirect_reg_count,
2652                                 int *indirect_start_offsets,
2653                                 int *indirect_start_offsets_count,
2654                                 int max_start_offsets_count)
2655 {
2656         int idx;
2657
2658         for (; indirect_offset < list_size; indirect_offset++) {
2659                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2660                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2661                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2662
2663                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2664                         indirect_offset += 2;
2665
2666                         /* look for the matching indice */
2667                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2668                                 if (unique_indirect_regs[idx] ==
2669                                         register_list_format[indirect_offset] ||
2670                                         !unique_indirect_regs[idx])
2671                                         break;
2672                         }
2673
2674                         BUG_ON(idx >= unique_indirect_reg_count);
2675
2676                         if (!unique_indirect_regs[idx])
2677                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2678
2679                         indirect_offset++;
2680                 }
2681         }
2682 }
2683
2684 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2685 {
2686         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2687         int unique_indirect_reg_count = 0;
2688
2689         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2690         int indirect_start_offsets_count = 0;
2691
2692         int list_size = 0;
2693         int i = 0, j = 0;
2694         u32 tmp = 0;
2695
2696         u32 *register_list_format =
2697                 kmemdup(adev->gfx.rlc.register_list_format,
2698                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2699         if (!register_list_format)
2700                 return -ENOMEM;
2701
2702         /* setup unique_indirect_regs array and indirect_start_offsets array */
2703         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2704         gfx_v9_1_parse_ind_reg_list(register_list_format,
2705                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2706                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2707                                     unique_indirect_regs,
2708                                     unique_indirect_reg_count,
2709                                     indirect_start_offsets,
2710                                     &indirect_start_offsets_count,
2711                                     ARRAY_SIZE(indirect_start_offsets));
2712
2713         /* enable auto inc in case it is disabled */
2714         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2715         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2716         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2717
2718         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2719         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2720                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2721         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2722                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2723                         adev->gfx.rlc.register_restore[i]);
2724
2725         /* load indirect register */
2726         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2727                 adev->gfx.rlc.reg_list_format_start);
2728
2729         /* direct register portion */
2730         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2731                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2732                         register_list_format[i]);
2733
2734         /* indirect register portion */
2735         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2736                 if (register_list_format[i] == 0xFFFFFFFF) {
2737                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2738                         continue;
2739                 }
2740
2741                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2742                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2743
2744                 for (j = 0; j < unique_indirect_reg_count; j++) {
2745                         if (register_list_format[i] == unique_indirect_regs[j]) {
2746                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2747                                 break;
2748                         }
2749                 }
2750
2751                 BUG_ON(j >= unique_indirect_reg_count);
2752
2753                 i++;
2754         }
2755
2756         /* set save/restore list size */
2757         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2758         list_size = list_size >> 1;
2759         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2760                 adev->gfx.rlc.reg_restore_list_size);
2761         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2762
2763         /* write the starting offsets to RLC scratch ram */
2764         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2765                 adev->gfx.rlc.starting_offsets_start);
2766         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2767                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2768                        indirect_start_offsets[i]);
2769
2770         /* load unique indirect regs*/
2771         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2772                 if (unique_indirect_regs[i] != 0) {
2773                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2774                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2775                                unique_indirect_regs[i] & 0x3FFFF);
2776
2777                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2778                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2779                                unique_indirect_regs[i] >> 20);
2780                 }
2781         }
2782
2783         kfree(register_list_format);
2784         return 0;
2785 }
2786
2787 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2788 {
2789         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2790 }
2791
2792 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2793                                              bool enable)
2794 {
2795         uint32_t data = 0;
2796         uint32_t default_data = 0;
2797
2798         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2799         if (enable == true) {
2800                 /* enable GFXIP control over CGPG */
2801                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2802                 if(default_data != data)
2803                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2804
2805                 /* update status */
2806                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2807                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2808                 if(default_data != data)
2809                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2810         } else {
2811                 /* restore GFXIP control over GCPG */
2812                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2813                 if(default_data != data)
2814                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2815         }
2816 }
2817
2818 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2819 {
2820         uint32_t data = 0;
2821
2822         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2823                               AMD_PG_SUPPORT_GFX_SMG |
2824                               AMD_PG_SUPPORT_GFX_DMG)) {
2825                 /* init IDLE_POLL_COUNT = 60 */
2826                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2827                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2828                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2829                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2830
2831                 /* init RLC PG Delay */
2832                 data = 0;
2833                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2834                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2835                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2836                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2837                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2838
2839                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2840                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2841                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2842                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2843
2844                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2845                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2846                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2847                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2848
2849                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2850                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2851
2852                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2853                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2854                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2855
2856                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2857         }
2858 }
2859
2860 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2861                                                 bool enable)
2862 {
2863         uint32_t data = 0;
2864         uint32_t default_data = 0;
2865
2866         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2867         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2868                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2869                              enable ? 1 : 0);
2870         if (default_data != data)
2871                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2872 }
2873
2874 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2875                                                 bool enable)
2876 {
2877         uint32_t data = 0;
2878         uint32_t default_data = 0;
2879
2880         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2881         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2882                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2883                              enable ? 1 : 0);
2884         if(default_data != data)
2885                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2886 }
2887
2888 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2889                                         bool enable)
2890 {
2891         uint32_t data = 0;
2892         uint32_t default_data = 0;
2893
2894         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2895         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2896                              CP_PG_DISABLE,
2897                              enable ? 0 : 1);
2898         if(default_data != data)
2899                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2900 }
2901
2902 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2903                                                 bool enable)
2904 {
2905         uint32_t data, default_data;
2906
2907         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2908         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2909                              GFX_POWER_GATING_ENABLE,
2910                              enable ? 1 : 0);
2911         if(default_data != data)
2912                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2913 }
2914
2915 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2916                                                 bool enable)
2917 {
2918         uint32_t data, default_data;
2919
2920         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2921         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2922                              GFX_PIPELINE_PG_ENABLE,
2923                              enable ? 1 : 0);
2924         if(default_data != data)
2925                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2926
2927         if (!enable)
2928                 /* read any GFX register to wake up GFX */
2929                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2930 }
2931
2932 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2933                                                        bool enable)
2934 {
2935         uint32_t data, default_data;
2936
2937         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2938         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2939                              STATIC_PER_CU_PG_ENABLE,
2940                              enable ? 1 : 0);
2941         if(default_data != data)
2942                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2943 }
2944
2945 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2946                                                 bool enable)
2947 {
2948         uint32_t data, default_data;
2949
2950         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2951         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2952                              DYN_PER_CU_PG_ENABLE,
2953                              enable ? 1 : 0);
2954         if(default_data != data)
2955                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2956 }
2957
2958 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2959 {
2960         gfx_v9_0_init_csb(adev);
2961
2962         /*
2963          * Rlc save restore list is workable since v2_1.
2964          * And it's needed by gfxoff feature.
2965          */
2966         if (adev->gfx.rlc.is_rlc_v2_1) {
2967                 if (adev->asic_type == CHIP_VEGA12 ||
2968                     (adev->asic_type == CHIP_RAVEN &&
2969                      adev->rev_id >= 8))
2970                         gfx_v9_1_init_rlc_save_restore_list(adev);
2971                 gfx_v9_0_enable_save_restore_machine(adev);
2972         }
2973
2974         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2975                               AMD_PG_SUPPORT_GFX_SMG |
2976                               AMD_PG_SUPPORT_GFX_DMG |
2977                               AMD_PG_SUPPORT_CP |
2978                               AMD_PG_SUPPORT_GDS |
2979                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2980                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2981                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2982                 gfx_v9_0_init_gfx_power_gating(adev);
2983         }
2984 }
2985
2986 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2987 {
2988         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2989         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2990         gfx_v9_0_wait_for_rlc_serdes(adev);
2991 }
2992
2993 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2994 {
2995         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2996         udelay(50);
2997         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2998         udelay(50);
2999 }
3000
3001 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3002 {
3003 #ifdef AMDGPU_RLC_DEBUG_RETRY
3004         u32 rlc_ucode_ver;
3005 #endif
3006
3007         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3008         udelay(50);
3009
3010         /* carrizo do enable cp interrupt after cp inited */
3011         if (!(adev->flags & AMD_IS_APU)) {
3012                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3013                 udelay(50);
3014         }
3015
3016 #ifdef AMDGPU_RLC_DEBUG_RETRY
3017         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
3018         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3019         if(rlc_ucode_ver == 0x108) {
3020                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3021                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
3022                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3023                  * default is 0x9C4 to create a 100us interval */
3024                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3025                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3026                  * to disable the page fault retry interrupts, default is
3027                  * 0x100 (256) */
3028                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3029         }
3030 #endif
3031 }
3032
3033 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3034 {
3035         const struct rlc_firmware_header_v2_0 *hdr;
3036         const __le32 *fw_data;
3037         unsigned i, fw_size;
3038
3039         if (!adev->gfx.rlc_fw)
3040                 return -EINVAL;
3041
3042         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3043         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3044
3045         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3046                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3047         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3048
3049         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3050                         RLCG_UCODE_LOADING_START_ADDRESS);
3051         for (i = 0; i < fw_size; i++)
3052                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3053         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3054
3055         return 0;
3056 }
3057
3058 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3059 {
3060         int r;
3061
3062         if (amdgpu_sriov_vf(adev)) {
3063                 gfx_v9_0_init_csb(adev);
3064                 return 0;
3065         }
3066
3067         adev->gfx.rlc.funcs->stop(adev);
3068
3069         /* disable CG */
3070         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3071
3072         gfx_v9_0_init_pg(adev);
3073
3074         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3075                 /* legacy rlc firmware loading */
3076                 r = gfx_v9_0_rlc_load_microcode(adev);
3077                 if (r)
3078                         return r;
3079         }
3080
3081         switch (adev->asic_type) {
3082         case CHIP_RAVEN:
3083                 if (amdgpu_lbpw == 0)
3084                         gfx_v9_0_enable_lbpw(adev, false);
3085                 else
3086                         gfx_v9_0_enable_lbpw(adev, true);
3087                 break;
3088         case CHIP_VEGA20:
3089                 if (amdgpu_lbpw > 0)
3090                         gfx_v9_0_enable_lbpw(adev, true);
3091                 else
3092                         gfx_v9_0_enable_lbpw(adev, false);
3093                 break;
3094         default:
3095                 break;
3096         }
3097
3098         adev->gfx.rlc.funcs->start(adev);
3099
3100         return 0;
3101 }
3102
3103 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3104 {
3105         int i;
3106         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3107
3108         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3109         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3110         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3111         if (!enable) {
3112                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3113                         adev->gfx.gfx_ring[i].sched.ready = false;
3114         }
3115         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3116         udelay(50);
3117 }
3118
3119 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3120 {
3121         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3122         const struct gfx_firmware_header_v1_0 *ce_hdr;
3123         const struct gfx_firmware_header_v1_0 *me_hdr;
3124         const __le32 *fw_data;
3125         unsigned i, fw_size;
3126
3127         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3128                 return -EINVAL;
3129
3130         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3131                 adev->gfx.pfp_fw->data;
3132         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3133                 adev->gfx.ce_fw->data;
3134         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3135                 adev->gfx.me_fw->data;
3136
3137         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3138         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3139         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3140
3141         gfx_v9_0_cp_gfx_enable(adev, false);
3142
3143         /* PFP */
3144         fw_data = (const __le32 *)
3145                 (adev->gfx.pfp_fw->data +
3146                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3147         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3148         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3149         for (i = 0; i < fw_size; i++)
3150                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3151         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3152
3153         /* CE */
3154         fw_data = (const __le32 *)
3155                 (adev->gfx.ce_fw->data +
3156                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3157         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3158         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3159         for (i = 0; i < fw_size; i++)
3160                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3161         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3162
3163         /* ME */
3164         fw_data = (const __le32 *)
3165                 (adev->gfx.me_fw->data +
3166                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3167         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3168         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3169         for (i = 0; i < fw_size; i++)
3170                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3171         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3172
3173         return 0;
3174 }
3175
3176 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3177 {
3178         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3179         const struct cs_section_def *sect = NULL;
3180         const struct cs_extent_def *ext = NULL;
3181         int r, i, tmp;
3182
3183         /* init the CP */
3184         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3185         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3186
3187         gfx_v9_0_cp_gfx_enable(adev, true);
3188
3189         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3190         if (r) {
3191                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3192                 return r;
3193         }
3194
3195         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3196         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3197
3198         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3199         amdgpu_ring_write(ring, 0x80000000);
3200         amdgpu_ring_write(ring, 0x80000000);
3201
3202         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3203                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3204                         if (sect->id == SECT_CONTEXT) {
3205                                 amdgpu_ring_write(ring,
3206                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3207                                                ext->reg_count));
3208                                 amdgpu_ring_write(ring,
3209                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3210                                 for (i = 0; i < ext->reg_count; i++)
3211                                         amdgpu_ring_write(ring, ext->extent[i]);
3212                         }
3213                 }
3214         }
3215
3216         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3217         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3218
3219         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3220         amdgpu_ring_write(ring, 0);
3221
3222         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3223         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3224         amdgpu_ring_write(ring, 0x8000);
3225         amdgpu_ring_write(ring, 0x8000);
3226
3227         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3228         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3229                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3230         amdgpu_ring_write(ring, tmp);
3231         amdgpu_ring_write(ring, 0);
3232
3233         amdgpu_ring_commit(ring);
3234
3235         return 0;
3236 }
3237
3238 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3239 {
3240         struct amdgpu_ring *ring;
3241         u32 tmp;
3242         u32 rb_bufsz;
3243         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3244
3245         /* Set the write pointer delay */
3246         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3247
3248         /* set the RB to use vmid 0 */
3249         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3250
3251         /* Set ring buffer size */
3252         ring = &adev->gfx.gfx_ring[0];
3253         rb_bufsz = order_base_2(ring->ring_size / 8);
3254         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3255         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3256 #ifdef __BIG_ENDIAN
3257         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3258 #endif
3259         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3260
3261         /* Initialize the ring buffer's write pointers */
3262         ring->wptr = 0;
3263         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3264         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3265
3266         /* set the wb address wether it's enabled or not */
3267         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3268         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3269         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3270
3271         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3272         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3273         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3274
3275         mdelay(1);
3276         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3277
3278         rb_addr = ring->gpu_addr >> 8;
3279         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3280         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3281
3282         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3283         if (ring->use_doorbell) {
3284                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3285                                     DOORBELL_OFFSET, ring->doorbell_index);
3286                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3287                                     DOORBELL_EN, 1);
3288         } else {
3289                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3290         }
3291         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3292
3293         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3294                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3295         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3296
3297         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3298                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3299
3300
3301         /* start the ring */
3302         gfx_v9_0_cp_gfx_start(adev);
3303         ring->sched.ready = true;
3304
3305         return 0;
3306 }
3307
3308 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3309 {
3310         int i;
3311
3312         if (enable) {
3313                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3314         } else {
3315                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3316                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3317                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3318                         adev->gfx.compute_ring[i].sched.ready = false;
3319                 adev->gfx.kiq.ring.sched.ready = false;
3320         }
3321         udelay(50);
3322 }
3323
3324 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3325 {
3326         const struct gfx_firmware_header_v1_0 *mec_hdr;
3327         const __le32 *fw_data;
3328         unsigned i;
3329         u32 tmp;
3330
3331         if (!adev->gfx.mec_fw)
3332                 return -EINVAL;
3333
3334         gfx_v9_0_cp_compute_enable(adev, false);
3335
3336         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3337         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3338
3339         fw_data = (const __le32 *)
3340                 (adev->gfx.mec_fw->data +
3341                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3342         tmp = 0;
3343         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3344         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3345         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3346
3347         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3348                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3349         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3350                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3351
3352         /* MEC1 */
3353         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3354                          mec_hdr->jt_offset);
3355         for (i = 0; i < mec_hdr->jt_size; i++)
3356                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3357                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3358
3359         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3360                         adev->gfx.mec_fw_version);
3361         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3362
3363         return 0;
3364 }
3365
3366 /* KIQ functions */
3367 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3368 {
3369         uint32_t tmp;
3370         struct amdgpu_device *adev = ring->adev;
3371
3372         /* tell RLC which is KIQ queue */
3373         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3374         tmp &= 0xffffff00;
3375         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3376         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3377         tmp |= 0x80;
3378         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3379 }
3380
3381 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3382 {
3383         struct amdgpu_device *adev = ring->adev;
3384
3385         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3386                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
3387                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3388                         ring->has_high_prio = true;
3389                         mqd->cp_hqd_queue_priority =
3390                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3391                 } else {
3392                         ring->has_high_prio = false;
3393                 }
3394         }
3395 }
3396
3397 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3398 {
3399         struct amdgpu_device *adev = ring->adev;
3400         struct v9_mqd *mqd = ring->mqd_ptr;
3401         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3402         uint32_t tmp;
3403
3404         mqd->header = 0xC0310800;
3405         mqd->compute_pipelinestat_enable = 0x00000001;
3406         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3407         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3408         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3409         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3410         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3411         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3412         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3413         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3414         mqd->compute_misc_reserved = 0x00000003;
3415
3416         mqd->dynamic_cu_mask_addr_lo =
3417                 lower_32_bits(ring->mqd_gpu_addr
3418                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3419         mqd->dynamic_cu_mask_addr_hi =
3420                 upper_32_bits(ring->mqd_gpu_addr
3421                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3422
3423         eop_base_addr = ring->eop_gpu_addr >> 8;
3424         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3425         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3426
3427         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3428         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3429         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3430                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3431
3432         mqd->cp_hqd_eop_control = tmp;
3433
3434         /* enable doorbell? */
3435         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3436
3437         if (ring->use_doorbell) {
3438                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3439                                     DOORBELL_OFFSET, ring->doorbell_index);
3440                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3441                                     DOORBELL_EN, 1);
3442                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3443                                     DOORBELL_SOURCE, 0);
3444                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3445                                     DOORBELL_HIT, 0);
3446         } else {
3447                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3448                                          DOORBELL_EN, 0);
3449         }
3450
3451         mqd->cp_hqd_pq_doorbell_control = tmp;
3452
3453         /* disable the queue if it's active */
3454         ring->wptr = 0;
3455         mqd->cp_hqd_dequeue_request = 0;
3456         mqd->cp_hqd_pq_rptr = 0;
3457         mqd->cp_hqd_pq_wptr_lo = 0;
3458         mqd->cp_hqd_pq_wptr_hi = 0;
3459
3460         /* set the pointer to the MQD */
3461         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3462         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3463
3464         /* set MQD vmid to 0 */
3465         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3466         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3467         mqd->cp_mqd_control = tmp;
3468
3469         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3470         hqd_gpu_addr = ring->gpu_addr >> 8;
3471         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3472         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3473
3474         /* set up the HQD, this is similar to CP_RB0_CNTL */
3475         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3476         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3477                             (order_base_2(ring->ring_size / 4) - 1));
3478         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3479                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3480 #ifdef __BIG_ENDIAN
3481         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3482 #endif
3483         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3484         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3485         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3486         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3487         mqd->cp_hqd_pq_control = tmp;
3488
3489         /* set the wb address whether it's enabled or not */
3490         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3491         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3492         mqd->cp_hqd_pq_rptr_report_addr_hi =
3493                 upper_32_bits(wb_gpu_addr) & 0xffff;
3494
3495         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3496         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3497         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3498         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3499
3500         tmp = 0;
3501         /* enable the doorbell if requested */
3502         if (ring->use_doorbell) {
3503                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3504                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3505                                 DOORBELL_OFFSET, ring->doorbell_index);
3506
3507                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3508                                          DOORBELL_EN, 1);
3509                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3510                                          DOORBELL_SOURCE, 0);
3511                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3512                                          DOORBELL_HIT, 0);
3513         }
3514
3515         mqd->cp_hqd_pq_doorbell_control = tmp;
3516
3517         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3518         ring->wptr = 0;
3519         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3520
3521         /* set the vmid for the queue */
3522         mqd->cp_hqd_vmid = 0;
3523
3524         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3525         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3526         mqd->cp_hqd_persistent_state = tmp;
3527
3528         /* set MIN_IB_AVAIL_SIZE */
3529         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3530         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3531         mqd->cp_hqd_ib_control = tmp;
3532
3533         /* set static priority for a queue/ring */
3534         gfx_v9_0_mqd_set_priority(ring, mqd);
3535         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
3536
3537         /* map_queues packet doesn't need activate the queue,
3538          * so only kiq need set this field.
3539          */
3540         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3541                 mqd->cp_hqd_active = 1;
3542
3543         return 0;
3544 }
3545
3546 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3547 {
3548         struct amdgpu_device *adev = ring->adev;
3549         struct v9_mqd *mqd = ring->mqd_ptr;
3550         int j;
3551
3552         /* disable wptr polling */
3553         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3554
3555         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3556                mqd->cp_hqd_eop_base_addr_lo);
3557         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3558                mqd->cp_hqd_eop_base_addr_hi);
3559
3560         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3561         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3562                mqd->cp_hqd_eop_control);
3563
3564         /* enable doorbell? */
3565         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3566                mqd->cp_hqd_pq_doorbell_control);
3567
3568         /* disable the queue if it's active */
3569         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3570                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3571                 for (j = 0; j < adev->usec_timeout; j++) {
3572                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3573                                 break;
3574                         udelay(1);
3575                 }
3576                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3577                        mqd->cp_hqd_dequeue_request);
3578                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3579                        mqd->cp_hqd_pq_rptr);
3580                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3581                        mqd->cp_hqd_pq_wptr_lo);
3582                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3583                        mqd->cp_hqd_pq_wptr_hi);
3584         }
3585
3586         /* set the pointer to the MQD */
3587         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3588                mqd->cp_mqd_base_addr_lo);
3589         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3590                mqd->cp_mqd_base_addr_hi);
3591
3592         /* set MQD vmid to 0 */
3593         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3594                mqd->cp_mqd_control);
3595
3596         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3597         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3598                mqd->cp_hqd_pq_base_lo);
3599         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3600                mqd->cp_hqd_pq_base_hi);
3601
3602         /* set up the HQD, this is similar to CP_RB0_CNTL */
3603         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3604                mqd->cp_hqd_pq_control);
3605
3606         /* set the wb address whether it's enabled or not */
3607         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3608                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3609         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3610                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3611
3612         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3613         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3614                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3615         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3616                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3617
3618         /* enable the doorbell if requested */
3619         if (ring->use_doorbell) {
3620                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3621                                         (adev->doorbell_index.kiq * 2) << 2);
3622                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3623                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3624         }
3625
3626         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3627                mqd->cp_hqd_pq_doorbell_control);
3628
3629         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3630         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3631                mqd->cp_hqd_pq_wptr_lo);
3632         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3633                mqd->cp_hqd_pq_wptr_hi);
3634
3635         /* set the vmid for the queue */
3636         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3637
3638         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3639                mqd->cp_hqd_persistent_state);
3640
3641         /* activate the queue */
3642         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3643                mqd->cp_hqd_active);
3644
3645         if (ring->use_doorbell)
3646                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3647
3648         return 0;
3649 }
3650
3651 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3652 {
3653         struct amdgpu_device *adev = ring->adev;
3654         int j;
3655
3656         /* disable the queue if it's active */
3657         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3658
3659                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3660
3661                 for (j = 0; j < adev->usec_timeout; j++) {
3662                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3663                                 break;
3664                         udelay(1);
3665                 }
3666
3667                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3668                         DRM_DEBUG("KIQ dequeue request failed.\n");
3669
3670                         /* Manual disable if dequeue request times out */
3671                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3672                 }
3673
3674                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3675                       0);
3676         }
3677
3678         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3679         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3680         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3681         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3682         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3683         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3684         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3685         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3686
3687         return 0;
3688 }
3689
3690 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3691 {
3692         struct amdgpu_device *adev = ring->adev;
3693         struct v9_mqd *mqd = ring->mqd_ptr;
3694         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3695
3696         gfx_v9_0_kiq_setting(ring);
3697
3698         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3699                 /* reset MQD to a clean status */
3700                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3701                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3702
3703                 /* reset ring buffer */
3704                 ring->wptr = 0;
3705                 amdgpu_ring_clear_ring(ring);
3706
3707                 mutex_lock(&adev->srbm_mutex);
3708                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3709                 gfx_v9_0_kiq_init_register(ring);
3710                 soc15_grbm_select(adev, 0, 0, 0, 0);
3711                 mutex_unlock(&adev->srbm_mutex);
3712         } else {
3713                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3714                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3715                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3716                 mutex_lock(&adev->srbm_mutex);
3717                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3718                 gfx_v9_0_mqd_init(ring);
3719                 gfx_v9_0_kiq_init_register(ring);
3720                 soc15_grbm_select(adev, 0, 0, 0, 0);
3721                 mutex_unlock(&adev->srbm_mutex);
3722
3723                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3724                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3725         }
3726
3727         return 0;
3728 }
3729
3730 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3731 {
3732         struct amdgpu_device *adev = ring->adev;
3733         struct v9_mqd *mqd = ring->mqd_ptr;
3734         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3735
3736         if (!adev->in_gpu_reset && !adev->in_suspend) {
3737                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3738                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3739                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3740                 mutex_lock(&adev->srbm_mutex);
3741                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3742                 gfx_v9_0_mqd_init(ring);
3743                 soc15_grbm_select(adev, 0, 0, 0, 0);
3744                 mutex_unlock(&adev->srbm_mutex);
3745
3746                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3747                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3748         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3749                 /* reset MQD to a clean status */
3750                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3751                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3752
3753                 /* reset ring buffer */
3754                 ring->wptr = 0;
3755                 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
3756                 amdgpu_ring_clear_ring(ring);
3757         } else {
3758                 amdgpu_ring_clear_ring(ring);
3759         }
3760
3761         return 0;
3762 }
3763
3764 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3765 {
3766         struct amdgpu_ring *ring;
3767         int r;
3768
3769         ring = &adev->gfx.kiq.ring;
3770
3771         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3772         if (unlikely(r != 0))
3773                 return r;
3774
3775         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3776         if (unlikely(r != 0))
3777                 return r;
3778
3779         gfx_v9_0_kiq_init_queue(ring);
3780         amdgpu_bo_kunmap(ring->mqd_obj);
3781         ring->mqd_ptr = NULL;
3782         amdgpu_bo_unreserve(ring->mqd_obj);
3783         ring->sched.ready = true;
3784         return 0;
3785 }
3786
3787 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3788 {
3789         struct amdgpu_ring *ring = NULL;
3790         int r = 0, i;
3791
3792         gfx_v9_0_cp_compute_enable(adev, true);
3793
3794         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3795                 ring = &adev->gfx.compute_ring[i];
3796
3797                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3798                 if (unlikely(r != 0))
3799                         goto done;
3800                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3801                 if (!r) {
3802                         r = gfx_v9_0_kcq_init_queue(ring);
3803                         amdgpu_bo_kunmap(ring->mqd_obj);
3804                         ring->mqd_ptr = NULL;
3805                 }
3806                 amdgpu_bo_unreserve(ring->mqd_obj);
3807                 if (r)
3808                         goto done;
3809         }
3810
3811         r = amdgpu_gfx_enable_kcq(adev);
3812 done:
3813         return r;
3814 }
3815
3816 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3817 {
3818         int r, i;
3819         struct amdgpu_ring *ring;
3820
3821         if (!(adev->flags & AMD_IS_APU))
3822                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3823
3824         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3825                 if (adev->asic_type != CHIP_ARCTURUS) {
3826                         /* legacy firmware loading */
3827                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3828                         if (r)
3829                                 return r;
3830                 }
3831
3832                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3833                 if (r)
3834                         return r;
3835         }
3836
3837         r = gfx_v9_0_kiq_resume(adev);
3838         if (r)
3839                 return r;
3840
3841         if (adev->asic_type != CHIP_ARCTURUS) {
3842                 r = gfx_v9_0_cp_gfx_resume(adev);
3843                 if (r)
3844                         return r;
3845         }
3846
3847         r = gfx_v9_0_kcq_resume(adev);
3848         if (r)
3849                 return r;
3850
3851         if (adev->asic_type != CHIP_ARCTURUS) {
3852                 ring = &adev->gfx.gfx_ring[0];
3853                 r = amdgpu_ring_test_helper(ring);
3854                 if (r)
3855                         return r;
3856         }
3857
3858         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3859                 ring = &adev->gfx.compute_ring[i];
3860                 amdgpu_ring_test_helper(ring);
3861         }
3862
3863         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3864
3865         return 0;
3866 }
3867
3868 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3869 {
3870         u32 tmp;
3871
3872         if (adev->asic_type != CHIP_ARCTURUS)
3873                 return;
3874
3875         tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3876         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3877                                 adev->df.hash_status.hash_64k);
3878         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3879                                 adev->df.hash_status.hash_2m);
3880         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3881                                 adev->df.hash_status.hash_1g);
3882         WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3883 }
3884
3885 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3886 {
3887         if (adev->asic_type != CHIP_ARCTURUS)
3888                 gfx_v9_0_cp_gfx_enable(adev, enable);
3889         gfx_v9_0_cp_compute_enable(adev, enable);
3890 }
3891
3892 static int gfx_v9_0_hw_init(void *handle)
3893 {
3894         int r;
3895         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3896
3897         if (!amdgpu_sriov_vf(adev))
3898                 gfx_v9_0_init_golden_registers(adev);
3899
3900         gfx_v9_0_constants_init(adev);
3901
3902         gfx_v9_0_init_tcp_config(adev);
3903
3904         r = adev->gfx.rlc.funcs->resume(adev);
3905         if (r)
3906                 return r;
3907
3908         r = gfx_v9_0_cp_resume(adev);
3909         if (r)
3910                 return r;
3911
3912         return r;
3913 }
3914
3915 static int gfx_v9_0_hw_fini(void *handle)
3916 {
3917         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3918
3919         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3920         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3921         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3922
3923         /* DF freeze and kcq disable will fail */
3924         if (!amdgpu_ras_intr_triggered())
3925                 /* disable KCQ to avoid CPC touch memory not valid anymore */
3926                 amdgpu_gfx_disable_kcq(adev);
3927
3928         if (amdgpu_sriov_vf(adev)) {
3929                 gfx_v9_0_cp_gfx_enable(adev, false);
3930                 /* must disable polling for SRIOV when hw finished, otherwise
3931                  * CPC engine may still keep fetching WB address which is already
3932                  * invalid after sw finished and trigger DMAR reading error in
3933                  * hypervisor side.
3934                  */
3935                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3936                 return 0;
3937         }
3938
3939         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3940          * otherwise KIQ is hanging when binding back
3941          */
3942         if (!adev->in_gpu_reset && !adev->in_suspend) {
3943                 mutex_lock(&adev->srbm_mutex);
3944                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3945                                 adev->gfx.kiq.ring.pipe,
3946                                 adev->gfx.kiq.ring.queue, 0);
3947                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3948                 soc15_grbm_select(adev, 0, 0, 0, 0);
3949                 mutex_unlock(&adev->srbm_mutex);
3950         }
3951
3952         gfx_v9_0_cp_enable(adev, false);
3953         adev->gfx.rlc.funcs->stop(adev);
3954
3955         return 0;
3956 }
3957
3958 static int gfx_v9_0_suspend(void *handle)
3959 {
3960         return gfx_v9_0_hw_fini(handle);
3961 }
3962
3963 static int gfx_v9_0_resume(void *handle)
3964 {
3965         return gfx_v9_0_hw_init(handle);
3966 }
3967
3968 static bool gfx_v9_0_is_idle(void *handle)
3969 {
3970         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3971
3972         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3973                                 GRBM_STATUS, GUI_ACTIVE))
3974                 return false;
3975         else
3976                 return true;
3977 }
3978
3979 static int gfx_v9_0_wait_for_idle(void *handle)
3980 {
3981         unsigned i;
3982         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3983
3984         for (i = 0; i < adev->usec_timeout; i++) {
3985                 if (gfx_v9_0_is_idle(handle))
3986                         return 0;
3987                 udelay(1);
3988         }
3989         return -ETIMEDOUT;
3990 }
3991
3992 static int gfx_v9_0_soft_reset(void *handle)
3993 {
3994         u32 grbm_soft_reset = 0;
3995         u32 tmp;
3996         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3997
3998         /* GRBM_STATUS */
3999         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4000         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4001                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4002                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4003                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4004                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4005                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4006                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4007                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4008                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4009                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4010         }
4011
4012         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4013                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4014                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4015         }
4016
4017         /* GRBM_STATUS2 */
4018         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4019         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4020                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4021                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4022
4023
4024         if (grbm_soft_reset) {
4025                 /* stop the rlc */
4026                 adev->gfx.rlc.funcs->stop(adev);
4027
4028                 if (adev->asic_type != CHIP_ARCTURUS)
4029                         /* Disable GFX parsing/prefetching */
4030                         gfx_v9_0_cp_gfx_enable(adev, false);
4031
4032                 /* Disable MEC parsing/prefetching */
4033                 gfx_v9_0_cp_compute_enable(adev, false);
4034
4035                 if (grbm_soft_reset) {
4036                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4037                         tmp |= grbm_soft_reset;
4038                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4039                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4040                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4041
4042                         udelay(50);
4043
4044                         tmp &= ~grbm_soft_reset;
4045                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4046                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4047                 }
4048
4049                 /* Wait a little for things to settle down */
4050                 udelay(50);
4051         }
4052         return 0;
4053 }
4054
4055 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4056 {
4057         signed long r, cnt = 0;
4058         unsigned long flags;
4059         uint32_t seq;
4060         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4061         struct amdgpu_ring *ring = &kiq->ring;
4062
4063         BUG_ON(!ring->funcs->emit_rreg);
4064
4065         spin_lock_irqsave(&kiq->ring_lock, flags);
4066         amdgpu_ring_alloc(ring, 32);
4067         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4068         amdgpu_ring_write(ring, 9 |     /* src: register*/
4069                                 (5 << 8) |      /* dst: memory */
4070                                 (1 << 16) |     /* count sel */
4071                                 (1 << 20));     /* write confirm */
4072         amdgpu_ring_write(ring, 0);
4073         amdgpu_ring_write(ring, 0);
4074         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4075                                 kiq->reg_val_offs * 4));
4076         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4077                                 kiq->reg_val_offs * 4));
4078         amdgpu_fence_emit_polling(ring, &seq);
4079         amdgpu_ring_commit(ring);
4080         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4081
4082         r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4083
4084         /* don't wait anymore for gpu reset case because this way may
4085          * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4086          * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4087          * never return if we keep waiting in virt_kiq_rreg, which cause
4088          * gpu_recover() hang there.
4089          *
4090          * also don't wait anymore for IRQ context
4091          * */
4092         if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
4093                 goto failed_kiq_read;
4094
4095         might_sleep();
4096         while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4097                 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4098                 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4099         }
4100
4101         if (cnt > MAX_KIQ_REG_TRY)
4102                 goto failed_kiq_read;
4103
4104         return (uint64_t)adev->wb.wb[kiq->reg_val_offs] |
4105                 (uint64_t)adev->wb.wb[kiq->reg_val_offs + 1 ] << 32ULL;
4106
4107 failed_kiq_read:
4108         pr_err("failed to read gpu clock\n");
4109         return ~0;
4110 }
4111
4112 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4113 {
4114         uint64_t clock;
4115
4116         amdgpu_gfx_off_ctrl(adev, false);
4117         mutex_lock(&adev->gfx.gpu_clock_mutex);
4118         if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
4119                 clock = gfx_v9_0_kiq_read_clock(adev);
4120         } else {
4121                 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4122                 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4123                         ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4124         }
4125         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4126         amdgpu_gfx_off_ctrl(adev, true);
4127         return clock;
4128 }
4129
4130 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4131                                           uint32_t vmid,
4132                                           uint32_t gds_base, uint32_t gds_size,
4133                                           uint32_t gws_base, uint32_t gws_size,
4134                                           uint32_t oa_base, uint32_t oa_size)
4135 {
4136         struct amdgpu_device *adev = ring->adev;
4137
4138         /* GDS Base */
4139         gfx_v9_0_write_data_to_reg(ring, 0, false,
4140                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4141                                    gds_base);
4142
4143         /* GDS Size */
4144         gfx_v9_0_write_data_to_reg(ring, 0, false,
4145                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4146                                    gds_size);
4147
4148         /* GWS */
4149         gfx_v9_0_write_data_to_reg(ring, 0, false,
4150                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4151                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4152
4153         /* OA */
4154         gfx_v9_0_write_data_to_reg(ring, 0, false,
4155                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4156                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
4157 }
4158
4159 static const u32 vgpr_init_compute_shader[] =
4160 {
4161         0xb07c0000, 0xbe8000ff,
4162         0x000000f8, 0xbf110800,
4163         0x7e000280, 0x7e020280,
4164         0x7e040280, 0x7e060280,
4165         0x7e080280, 0x7e0a0280,
4166         0x7e0c0280, 0x7e0e0280,
4167         0x80808800, 0xbe803200,
4168         0xbf84fff5, 0xbf9c0000,
4169         0xd28c0001, 0x0001007f,
4170         0xd28d0001, 0x0002027e,
4171         0x10020288, 0xb8810904,
4172         0xb7814000, 0xd1196a01,
4173         0x00000301, 0xbe800087,
4174         0xbefc00c1, 0xd89c4000,
4175         0x00020201, 0xd89cc080,
4176         0x00040401, 0x320202ff,
4177         0x00000800, 0x80808100,
4178         0xbf84fff8, 0x7e020280,
4179         0xbf810000, 0x00000000,
4180 };
4181
4182 static const u32 sgpr_init_compute_shader[] =
4183 {
4184         0xb07c0000, 0xbe8000ff,
4185         0x0000005f, 0xbee50080,
4186         0xbe812c65, 0xbe822c65,
4187         0xbe832c65, 0xbe842c65,
4188         0xbe852c65, 0xb77c0005,
4189         0x80808500, 0xbf84fff8,
4190         0xbe800080, 0xbf810000,
4191 };
4192
4193 static const u32 vgpr_init_compute_shader_arcturus[] = {
4194         0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4195         0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4196         0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4197         0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4198         0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4199         0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4200         0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4201         0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4202         0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4203         0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4204         0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4205         0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4206         0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4207         0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4208         0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4209         0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4210         0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4211         0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4212         0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4213         0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4214         0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4215         0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4216         0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4217         0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4218         0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4219         0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4220         0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4221         0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4222         0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4223         0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4224         0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4225         0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4226         0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4227         0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4228         0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4229         0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4230         0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4231         0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4232         0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4233         0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4234         0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4235         0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4236         0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4237         0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4238         0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4239         0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4240         0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4241         0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4242         0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4243         0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4244         0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4245         0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4246         0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4247         0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4248         0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4249         0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4250         0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4251         0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4252         0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4253         0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4254         0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4255         0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4256         0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4257         0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4258         0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4259         0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4260         0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4261         0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4262         0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4263         0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4264         0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4265         0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4266         0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4267         0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4268         0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4269         0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4270         0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4271         0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4272         0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4273         0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4274         0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4275         0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4276         0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4277         0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4278         0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4279         0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4280         0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4281         0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4282         0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4283         0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4284         0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4285         0xbf84fff8, 0xbf810000,
4286 };
4287
4288 /* When below register arrays changed, please update gpr_reg_size,
4289   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4290   to cover all gfx9 ASICs */
4291 static const struct soc15_reg_entry vgpr_init_regs[] = {
4292    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4293    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4294    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4295    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4296    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4297    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4298    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4299    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4300    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4301    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4302    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4303    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4304    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4305    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4306 };
4307
4308 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4309    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4310    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4311    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4312    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4313    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4314    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4315    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4316    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4317    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4318    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4319    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4320    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4321    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4322    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4323 };
4324
4325 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4326    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4327    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4328    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4329    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4330    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4331    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4332    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4333    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4334    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4335    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4336    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4337    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4338    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4339    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4340 };
4341
4342 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4343    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4344    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4345    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4346    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4347    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4348    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4349    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4350    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4351    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4352    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4353    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4354    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4355    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4356    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4357 };
4358
4359 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4360    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4361    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4362    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4363    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4364    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4365    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4366    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4367    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4368    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4369    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4370    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4371    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4372    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4373    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4374    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4375    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4376    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4377    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4378    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4379    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4380    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4381    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4382    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4383    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4384    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4385    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4386    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4387    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4388    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4389    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4390    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4391    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4392    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4393 };
4394
4395 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4396 {
4397         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4398         int i, r;
4399
4400         /* only support when RAS is enabled */
4401         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4402                 return 0;
4403
4404         r = amdgpu_ring_alloc(ring, 7);
4405         if (r) {
4406                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4407                         ring->name, r);
4408                 return r;
4409         }
4410
4411         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4412         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4413
4414         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4415         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4416                                 PACKET3_DMA_DATA_DST_SEL(1) |
4417                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4418                                 PACKET3_DMA_DATA_ENGINE(0)));
4419         amdgpu_ring_write(ring, 0);
4420         amdgpu_ring_write(ring, 0);
4421         amdgpu_ring_write(ring, 0);
4422         amdgpu_ring_write(ring, 0);
4423         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4424                                 adev->gds.gds_size);
4425
4426         amdgpu_ring_commit(ring);
4427
4428         for (i = 0; i < adev->usec_timeout; i++) {
4429                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4430                         break;
4431                 udelay(1);
4432         }
4433
4434         if (i >= adev->usec_timeout)
4435                 r = -ETIMEDOUT;
4436
4437         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4438
4439         return r;
4440 }
4441
4442 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4443 {
4444         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4445         struct amdgpu_ib ib;
4446         struct dma_fence *f = NULL;
4447         int r, i;
4448         unsigned total_size, vgpr_offset, sgpr_offset;
4449         u64 gpu_addr;
4450
4451         int compute_dim_x = adev->gfx.config.max_shader_engines *
4452                                                 adev->gfx.config.max_cu_per_sh *
4453                                                 adev->gfx.config.max_sh_per_se;
4454         int sgpr_work_group_size = 5;
4455         int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4456         int vgpr_init_shader_size;
4457         const u32 *vgpr_init_shader_ptr;
4458         const struct soc15_reg_entry *vgpr_init_regs_ptr;
4459
4460         /* only support when RAS is enabled */
4461         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4462                 return 0;
4463
4464         /* bail if the compute ring is not ready */
4465         if (!ring->sched.ready)
4466                 return 0;
4467
4468         if (adev->asic_type == CHIP_ARCTURUS) {
4469                 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4470                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4471                 vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4472         } else {
4473                 vgpr_init_shader_ptr = vgpr_init_compute_shader;
4474                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4475                 vgpr_init_regs_ptr = vgpr_init_regs;
4476         }
4477
4478         total_size =
4479                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4480         total_size +=
4481                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4482         total_size +=
4483                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4484         total_size = ALIGN(total_size, 256);
4485         vgpr_offset = total_size;
4486         total_size += ALIGN(vgpr_init_shader_size, 256);
4487         sgpr_offset = total_size;
4488         total_size += sizeof(sgpr_init_compute_shader);
4489
4490         /* allocate an indirect buffer to put the commands in */
4491         memset(&ib, 0, sizeof(ib));
4492         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4493         if (r) {
4494                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4495                 return r;
4496         }
4497
4498         /* load the compute shaders */
4499         for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4500                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4501
4502         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4503                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4504
4505         /* init the ib length to 0 */
4506         ib.length_dw = 0;
4507
4508         /* VGPR */
4509         /* write the register state for the compute dispatch */
4510         for (i = 0; i < gpr_reg_size; i++) {
4511                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4512                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4513                                                                 - PACKET3_SET_SH_REG_START;
4514                 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4515         }
4516         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4517         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4518         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4519         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4520                                                         - PACKET3_SET_SH_REG_START;
4521         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4522         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4523
4524         /* write dispatch packet */
4525         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4526         ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4527         ib.ptr[ib.length_dw++] = 1; /* y */
4528         ib.ptr[ib.length_dw++] = 1; /* z */
4529         ib.ptr[ib.length_dw++] =
4530                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4531
4532         /* write CS partial flush packet */
4533         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4534         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4535
4536         /* SGPR1 */
4537         /* write the register state for the compute dispatch */
4538         for (i = 0; i < gpr_reg_size; i++) {
4539                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4540                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4541                                                                 - PACKET3_SET_SH_REG_START;
4542                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4543         }
4544         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4545         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4546         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4547         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4548                                                         - PACKET3_SET_SH_REG_START;
4549         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4550         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4551
4552         /* write dispatch packet */
4553         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4554         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4555         ib.ptr[ib.length_dw++] = 1; /* y */
4556         ib.ptr[ib.length_dw++] = 1; /* z */
4557         ib.ptr[ib.length_dw++] =
4558                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4559
4560         /* write CS partial flush packet */
4561         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4562         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4563
4564         /* SGPR2 */
4565         /* write the register state for the compute dispatch */
4566         for (i = 0; i < gpr_reg_size; i++) {
4567                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4568                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4569                                                                 - PACKET3_SET_SH_REG_START;
4570                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4571         }
4572         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4573         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4574         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4575         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4576                                                         - PACKET3_SET_SH_REG_START;
4577         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4578         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4579
4580         /* write dispatch packet */
4581         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4582         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4583         ib.ptr[ib.length_dw++] = 1; /* y */
4584         ib.ptr[ib.length_dw++] = 1; /* z */
4585         ib.ptr[ib.length_dw++] =
4586                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4587
4588         /* write CS partial flush packet */
4589         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4590         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4591
4592         /* shedule the ib on the ring */
4593         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4594         if (r) {
4595                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4596                 goto fail;
4597         }
4598
4599         /* wait for the GPU to finish processing the IB */
4600         r = dma_fence_wait(f, false);
4601         if (r) {
4602                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4603                 goto fail;
4604         }
4605
4606 fail:
4607         amdgpu_ib_free(adev, &ib, NULL);
4608         dma_fence_put(f);
4609
4610         return r;
4611 }
4612
4613 static int gfx_v9_0_early_init(void *handle)
4614 {
4615         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4616
4617         if (adev->asic_type == CHIP_ARCTURUS)
4618                 adev->gfx.num_gfx_rings = 0;
4619         else
4620                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4621         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4622         gfx_v9_0_set_kiq_pm4_funcs(adev);
4623         gfx_v9_0_set_ring_funcs(adev);
4624         gfx_v9_0_set_irq_funcs(adev);
4625         gfx_v9_0_set_gds_init(adev);
4626         gfx_v9_0_set_rlc_funcs(adev);
4627
4628         return 0;
4629 }
4630
4631 static int gfx_v9_0_ecc_late_init(void *handle)
4632 {
4633         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4634         int r;
4635
4636         /*
4637          * Temp workaround to fix the issue that CP firmware fails to
4638          * update read pointer when CPDMA is writing clearing operation
4639          * to GDS in suspend/resume sequence on several cards. So just
4640          * limit this operation in cold boot sequence.
4641          */
4642         if (!adev->in_suspend) {
4643                 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4644                 if (r)
4645                         return r;
4646         }
4647
4648         /* requires IBs so do in late init after IB pool is initialized */
4649         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4650         if (r)
4651                 return r;
4652
4653         if (adev->gfx.funcs &&
4654             adev->gfx.funcs->reset_ras_error_count)
4655                 adev->gfx.funcs->reset_ras_error_count(adev);
4656
4657         r = amdgpu_gfx_ras_late_init(adev);
4658         if (r)
4659                 return r;
4660
4661         return 0;
4662 }
4663
4664 static int gfx_v9_0_late_init(void *handle)
4665 {
4666         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4667         int r;
4668
4669         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4670         if (r)
4671                 return r;
4672
4673         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4674         if (r)
4675                 return r;
4676
4677         r = gfx_v9_0_ecc_late_init(handle);
4678         if (r)
4679                 return r;
4680
4681         return 0;
4682 }
4683
4684 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4685 {
4686         uint32_t rlc_setting;
4687
4688         /* if RLC is not enabled, do nothing */
4689         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4690         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4691                 return false;
4692
4693         return true;
4694 }
4695
4696 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4697 {
4698         uint32_t data;
4699         unsigned i;
4700
4701         data = RLC_SAFE_MODE__CMD_MASK;
4702         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4703         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4704
4705         /* wait for RLC_SAFE_MODE */
4706         for (i = 0; i < adev->usec_timeout; i++) {
4707                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4708                         break;
4709                 udelay(1);
4710         }
4711 }
4712
4713 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4714 {
4715         uint32_t data;
4716
4717         data = RLC_SAFE_MODE__CMD_MASK;
4718         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4719 }
4720
4721 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4722                                                 bool enable)
4723 {
4724         amdgpu_gfx_rlc_enter_safe_mode(adev);
4725
4726         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4727                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4728                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4729                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4730         } else {
4731                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4732                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4733                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4734         }
4735
4736         amdgpu_gfx_rlc_exit_safe_mode(adev);
4737 }
4738
4739 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4740                                                 bool enable)
4741 {
4742         /* TODO: double check if we need to perform under safe mode */
4743         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4744
4745         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4746                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4747         else
4748                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4749
4750         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4751                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4752         else
4753                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4754
4755         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4756 }
4757
4758 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4759                                                       bool enable)
4760 {
4761         uint32_t data, def;
4762
4763         amdgpu_gfx_rlc_enter_safe_mode(adev);
4764
4765         /* It is disabled by HW by default */
4766         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4767                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4768                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4769
4770                 if (adev->asic_type != CHIP_VEGA12)
4771                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4772
4773                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4774                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4775                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4776
4777                 /* only for Vega10 & Raven1 */
4778                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4779
4780                 if (def != data)
4781                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4782
4783                 /* MGLS is a global flag to control all MGLS in GFX */
4784                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4785                         /* 2 - RLC memory Light sleep */
4786                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4787                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4788                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4789                                 if (def != data)
4790                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4791                         }
4792                         /* 3 - CP memory Light sleep */
4793                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4794                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4795                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4796                                 if (def != data)
4797                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4798                         }
4799                 }
4800         } else {
4801                 /* 1 - MGCG_OVERRIDE */
4802                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4803
4804                 if (adev->asic_type != CHIP_VEGA12)
4805                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4806
4807                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4808                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4809                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4810                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4811
4812                 if (def != data)
4813                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4814
4815                 /* 2 - disable MGLS in RLC */
4816                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4817                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4818                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4819                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4820                 }
4821
4822                 /* 3 - disable MGLS in CP */
4823                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4824                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4825                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4826                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4827                 }
4828         }
4829
4830         amdgpu_gfx_rlc_exit_safe_mode(adev);
4831 }
4832
4833 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4834                                            bool enable)
4835 {
4836         uint32_t data, def;
4837
4838         if (adev->asic_type == CHIP_ARCTURUS)
4839                 return;
4840
4841         amdgpu_gfx_rlc_enter_safe_mode(adev);
4842
4843         /* Enable 3D CGCG/CGLS */
4844         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4845                 /* write cmd to clear cgcg/cgls ov */
4846                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4847                 /* unset CGCG override */
4848                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4849                 /* update CGCG and CGLS override bits */
4850                 if (def != data)
4851                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4852
4853                 /* enable 3Dcgcg FSM(0x0000363f) */
4854                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4855
4856                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4857                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4858                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4859                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4860                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4861                 if (def != data)
4862                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4863
4864                 /* set IDLE_POLL_COUNT(0x00900100) */
4865                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4866                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4867                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4868                 if (def != data)
4869                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4870         } else {
4871                 /* Disable CGCG/CGLS */
4872                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4873                 /* disable cgcg, cgls should be disabled */
4874                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4875                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4876                 /* disable cgcg and cgls in FSM */
4877                 if (def != data)
4878                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4879         }
4880
4881         amdgpu_gfx_rlc_exit_safe_mode(adev);
4882 }
4883
4884 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4885                                                       bool enable)
4886 {
4887         uint32_t def, data;
4888
4889         amdgpu_gfx_rlc_enter_safe_mode(adev);
4890
4891         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4892                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4893                 /* unset CGCG override */
4894                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4895                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4896                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4897                 else
4898                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4899                 /* update CGCG and CGLS override bits */
4900                 if (def != data)
4901                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4902
4903                 /* enable cgcg FSM(0x0000363F) */
4904                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4905
4906                 if (adev->asic_type == CHIP_ARCTURUS)
4907                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4908                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4909                 else
4910                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4911                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4912                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4913                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4914                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4915                 if (def != data)
4916                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4917
4918                 /* set IDLE_POLL_COUNT(0x00900100) */
4919                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4920                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4921                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4922                 if (def != data)
4923                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4924         } else {
4925                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4926                 /* reset CGCG/CGLS bits */
4927                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4928                 /* disable cgcg and cgls in FSM */
4929                 if (def != data)
4930                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4931         }
4932
4933         amdgpu_gfx_rlc_exit_safe_mode(adev);
4934 }
4935
4936 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4937                                             bool enable)
4938 {
4939         if (enable) {
4940                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4941                  * ===  MGCG + MGLS ===
4942                  */
4943                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4944                 /* ===  CGCG /CGLS for GFX 3D Only === */
4945                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4946                 /* ===  CGCG + CGLS === */
4947                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4948         } else {
4949                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4950                  * ===  CGCG + CGLS ===
4951                  */
4952                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4953                 /* ===  CGCG /CGLS for GFX 3D Only === */
4954                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4955                 /* ===  MGCG + MGLS === */
4956                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4957         }
4958         return 0;
4959 }
4960
4961 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
4962 {
4963         u32 data;
4964
4965         data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
4966
4967         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
4968         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
4969
4970         WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
4971 }
4972
4973 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
4974                                         uint32_t offset,
4975                                         struct soc15_reg_rlcg *entries, int arr_size)
4976 {
4977         int i;
4978         uint32_t reg;
4979
4980         if (!entries)
4981                 return false;
4982
4983         for (i = 0; i < arr_size; i++) {
4984                 const struct soc15_reg_rlcg *entry;
4985
4986                 entry = &entries[i];
4987                 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
4988                 if (offset == reg)
4989                         return true;
4990         }
4991
4992         return false;
4993 }
4994
4995 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
4996 {
4997         return gfx_v9_0_check_rlcg_range(adev, offset,
4998                                         (void *)rlcg_access_gc_9_0,
4999                                         ARRAY_SIZE(rlcg_access_gc_9_0));
5000 }
5001
5002 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5003         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5004         .set_safe_mode = gfx_v9_0_set_safe_mode,
5005         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
5006         .init = gfx_v9_0_rlc_init,
5007         .get_csb_size = gfx_v9_0_get_csb_size,
5008         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
5009         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5010         .resume = gfx_v9_0_rlc_resume,
5011         .stop = gfx_v9_0_rlc_stop,
5012         .reset = gfx_v9_0_rlc_reset,
5013         .start = gfx_v9_0_rlc_start,
5014         .update_spm_vmid = gfx_v9_0_update_spm_vmid,
5015         .rlcg_wreg = gfx_v9_0_rlcg_wreg,
5016         .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5017 };
5018
5019 static int gfx_v9_0_set_powergating_state(void *handle,
5020                                           enum amd_powergating_state state)
5021 {
5022         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5023         bool enable = (state == AMD_PG_STATE_GATE);
5024
5025         switch (adev->asic_type) {
5026         case CHIP_RAVEN:
5027         case CHIP_RENOIR:
5028                 if (!enable)
5029                         amdgpu_gfx_off_ctrl(adev, false);
5030
5031                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5032                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5033                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5034                 } else {
5035                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5036                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5037                 }
5038
5039                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5040                         gfx_v9_0_enable_cp_power_gating(adev, true);
5041                 else
5042                         gfx_v9_0_enable_cp_power_gating(adev, false);
5043
5044                 /* update gfx cgpg state */
5045                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5046
5047                 /* update mgcg state */
5048                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5049
5050                 if (enable)
5051                         amdgpu_gfx_off_ctrl(adev, true);
5052                 break;
5053         case CHIP_VEGA12:
5054                 amdgpu_gfx_off_ctrl(adev, enable);
5055                 break;
5056         default:
5057                 break;
5058         }
5059
5060         return 0;
5061 }
5062
5063 static int gfx_v9_0_set_clockgating_state(void *handle,
5064                                           enum amd_clockgating_state state)
5065 {
5066         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5067
5068         if (amdgpu_sriov_vf(adev))
5069                 return 0;
5070
5071         switch (adev->asic_type) {
5072         case CHIP_VEGA10:
5073         case CHIP_VEGA12:
5074         case CHIP_VEGA20:
5075         case CHIP_RAVEN:
5076         case CHIP_ARCTURUS:
5077         case CHIP_RENOIR:
5078                 gfx_v9_0_update_gfx_clock_gating(adev,
5079                                                  state == AMD_CG_STATE_GATE);
5080                 break;
5081         default:
5082                 break;
5083         }
5084         return 0;
5085 }
5086
5087 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
5088 {
5089         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5090         int data;
5091
5092         if (amdgpu_sriov_vf(adev))
5093                 *flags = 0;
5094
5095         /* AMD_CG_SUPPORT_GFX_MGCG */
5096         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5097         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5098                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5099
5100         /* AMD_CG_SUPPORT_GFX_CGCG */
5101         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5102         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5103                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5104
5105         /* AMD_CG_SUPPORT_GFX_CGLS */
5106         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5107                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5108
5109         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5110         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5111         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5112                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5113
5114         /* AMD_CG_SUPPORT_GFX_CP_LS */
5115         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5116         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5117                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5118
5119         if (adev->asic_type != CHIP_ARCTURUS) {
5120                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5121                 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5122                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5123                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5124
5125                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5126                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5127                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5128         }
5129 }
5130
5131 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5132 {
5133         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
5134 }
5135
5136 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5137 {
5138         struct amdgpu_device *adev = ring->adev;
5139         u64 wptr;
5140
5141         /* XXX check if swapping is necessary on BE */
5142         if (ring->use_doorbell) {
5143                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
5144         } else {
5145                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5146                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5147         }
5148
5149         return wptr;
5150 }
5151
5152 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5153 {
5154         struct amdgpu_device *adev = ring->adev;
5155
5156         if (ring->use_doorbell) {
5157                 /* XXX check if swapping is necessary on BE */
5158                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5159                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5160         } else {
5161                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5162                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5163         }
5164 }
5165
5166 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5167 {
5168         struct amdgpu_device *adev = ring->adev;
5169         u32 ref_and_mask, reg_mem_engine;
5170         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5171
5172         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5173                 switch (ring->me) {
5174                 case 1:
5175                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5176                         break;
5177                 case 2:
5178                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5179                         break;
5180                 default:
5181                         return;
5182                 }
5183                 reg_mem_engine = 0;
5184         } else {
5185                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5186                 reg_mem_engine = 1; /* pfp */
5187         }
5188
5189         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5190                               adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5191                               adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5192                               ref_and_mask, ref_and_mask, 0x20);
5193 }
5194
5195 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5196                                         struct amdgpu_job *job,
5197                                         struct amdgpu_ib *ib,
5198                                         uint32_t flags)
5199 {
5200         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5201         u32 header, control = 0;
5202
5203         if (ib->flags & AMDGPU_IB_FLAG_CE)
5204                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5205         else
5206                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5207
5208         control |= ib->length_dw | (vmid << 24);
5209
5210         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5211                 control |= INDIRECT_BUFFER_PRE_ENB(1);
5212
5213                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5214                         gfx_v9_0_ring_emit_de_meta(ring);
5215         }
5216
5217         amdgpu_ring_write(ring, header);
5218         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5219         amdgpu_ring_write(ring,
5220 #ifdef __BIG_ENDIAN
5221                 (2 << 0) |
5222 #endif
5223                 lower_32_bits(ib->gpu_addr));
5224         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5225         amdgpu_ring_write(ring, control);
5226 }
5227
5228 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5229                                           struct amdgpu_job *job,
5230                                           struct amdgpu_ib *ib,
5231                                           uint32_t flags)
5232 {
5233         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5234         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5235
5236         /* Currently, there is a high possibility to get wave ID mismatch
5237          * between ME and GDS, leading to a hw deadlock, because ME generates
5238          * different wave IDs than the GDS expects. This situation happens
5239          * randomly when at least 5 compute pipes use GDS ordered append.
5240          * The wave IDs generated by ME are also wrong after suspend/resume.
5241          * Those are probably bugs somewhere else in the kernel driver.
5242          *
5243          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5244          * GDS to 0 for this ring (me/pipe).
5245          */
5246         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5247                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5248                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5249                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5250         }
5251
5252         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5253         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5254         amdgpu_ring_write(ring,
5255 #ifdef __BIG_ENDIAN
5256                                 (2 << 0) |
5257 #endif
5258                                 lower_32_bits(ib->gpu_addr));
5259         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5260         amdgpu_ring_write(ring, control);
5261 }
5262
5263 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5264                                      u64 seq, unsigned flags)
5265 {
5266         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5267         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5268         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5269
5270         /* RELEASE_MEM - flush caches, send int */
5271         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5272         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5273                                                EOP_TC_NC_ACTION_EN) :
5274                                               (EOP_TCL1_ACTION_EN |
5275                                                EOP_TC_ACTION_EN |
5276                                                EOP_TC_WB_ACTION_EN |
5277                                                EOP_TC_MD_ACTION_EN)) |
5278                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5279                                  EVENT_INDEX(5)));
5280         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5281
5282         /*
5283          * the address should be Qword aligned if 64bit write, Dword
5284          * aligned if only send 32bit data low (discard data high)
5285          */
5286         if (write64bit)
5287                 BUG_ON(addr & 0x7);
5288         else
5289                 BUG_ON(addr & 0x3);
5290         amdgpu_ring_write(ring, lower_32_bits(addr));
5291         amdgpu_ring_write(ring, upper_32_bits(addr));
5292         amdgpu_ring_write(ring, lower_32_bits(seq));
5293         amdgpu_ring_write(ring, upper_32_bits(seq));
5294         amdgpu_ring_write(ring, 0);
5295 }
5296
5297 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5298 {
5299         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5300         uint32_t seq = ring->fence_drv.sync_seq;
5301         uint64_t addr = ring->fence_drv.gpu_addr;
5302
5303         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5304                               lower_32_bits(addr), upper_32_bits(addr),
5305                               seq, 0xffffffff, 4);
5306 }
5307
5308 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5309                                         unsigned vmid, uint64_t pd_addr)
5310 {
5311         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5312
5313         /* compute doesn't have PFP */
5314         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5315                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5316                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5317                 amdgpu_ring_write(ring, 0x0);
5318         }
5319 }
5320
5321 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5322 {
5323         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5324 }
5325
5326 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5327 {
5328         u64 wptr;
5329
5330         /* XXX check if swapping is necessary on BE */
5331         if (ring->use_doorbell)
5332                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5333         else
5334                 BUG();
5335         return wptr;
5336 }
5337
5338 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5339 {
5340         struct amdgpu_device *adev = ring->adev;
5341
5342         /* XXX check if swapping is necessary on BE */
5343         if (ring->use_doorbell) {
5344                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5345                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5346         } else{
5347                 BUG(); /* only DOORBELL method supported on gfx9 now */
5348         }
5349 }
5350
5351 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5352                                          u64 seq, unsigned int flags)
5353 {
5354         struct amdgpu_device *adev = ring->adev;
5355
5356         /* we only allocate 32bit for each seq wb address */
5357         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5358
5359         /* write fence seq to the "addr" */
5360         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5361         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5362                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5363         amdgpu_ring_write(ring, lower_32_bits(addr));
5364         amdgpu_ring_write(ring, upper_32_bits(addr));
5365         amdgpu_ring_write(ring, lower_32_bits(seq));
5366
5367         if (flags & AMDGPU_FENCE_FLAG_INT) {
5368                 /* set register to trigger INT */
5369                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5370                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5371                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5372                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5373                 amdgpu_ring_write(ring, 0);
5374                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5375         }
5376 }
5377
5378 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5379 {
5380         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5381         amdgpu_ring_write(ring, 0);
5382 }
5383
5384 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5385 {
5386         struct v9_ce_ib_state ce_payload = {0};
5387         uint64_t csa_addr;
5388         int cnt;
5389
5390         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5391         csa_addr = amdgpu_csa_vaddr(ring->adev);
5392
5393         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5394         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5395                                  WRITE_DATA_DST_SEL(8) |
5396                                  WR_CONFIRM) |
5397                                  WRITE_DATA_CACHE_POLICY(0));
5398         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5399         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5400         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5401 }
5402
5403 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5404 {
5405         struct v9_de_ib_state de_payload = {0};
5406         uint64_t csa_addr, gds_addr;
5407         int cnt;
5408
5409         csa_addr = amdgpu_csa_vaddr(ring->adev);
5410         gds_addr = csa_addr + 4096;
5411         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5412         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5413
5414         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5415         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5416         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5417                                  WRITE_DATA_DST_SEL(8) |
5418                                  WR_CONFIRM) |
5419                                  WRITE_DATA_CACHE_POLICY(0));
5420         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5421         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5422         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5423 }
5424
5425 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5426 {
5427         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5428         amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5429 }
5430
5431 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5432 {
5433         uint32_t dw2 = 0;
5434
5435         if (amdgpu_sriov_vf(ring->adev))
5436                 gfx_v9_0_ring_emit_ce_meta(ring);
5437
5438         gfx_v9_0_ring_emit_tmz(ring, true);
5439
5440         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5441         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5442                 /* set load_global_config & load_global_uconfig */
5443                 dw2 |= 0x8001;
5444                 /* set load_cs_sh_regs */
5445                 dw2 |= 0x01000000;
5446                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5447                 dw2 |= 0x10002;
5448
5449                 /* set load_ce_ram if preamble presented */
5450                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5451                         dw2 |= 0x10000000;
5452         } else {
5453                 /* still load_ce_ram if this is the first time preamble presented
5454                  * although there is no context switch happens.
5455                  */
5456                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5457                         dw2 |= 0x10000000;
5458         }
5459
5460         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5461         amdgpu_ring_write(ring, dw2);
5462         amdgpu_ring_write(ring, 0);
5463 }
5464
5465 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5466 {
5467         unsigned ret;
5468         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5469         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5470         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5471         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5472         ret = ring->wptr & ring->buf_mask;
5473         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5474         return ret;
5475 }
5476
5477 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5478 {
5479         unsigned cur;
5480         BUG_ON(offset > ring->buf_mask);
5481         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5482
5483         cur = (ring->wptr & ring->buf_mask) - 1;
5484         if (likely(cur > offset))
5485                 ring->ring[offset] = cur - offset;
5486         else
5487                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5488 }
5489
5490 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5491 {
5492         struct amdgpu_device *adev = ring->adev;
5493         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
5494
5495         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5496         amdgpu_ring_write(ring, 0 |     /* src: register*/
5497                                 (5 << 8) |      /* dst: memory */
5498                                 (1 << 20));     /* write confirm */
5499         amdgpu_ring_write(ring, reg);
5500         amdgpu_ring_write(ring, 0);
5501         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5502                                 kiq->reg_val_offs * 4));
5503         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5504                                 kiq->reg_val_offs * 4));
5505 }
5506
5507 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5508                                     uint32_t val)
5509 {
5510         uint32_t cmd = 0;
5511
5512         switch (ring->funcs->type) {
5513         case AMDGPU_RING_TYPE_GFX:
5514                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5515                 break;
5516         case AMDGPU_RING_TYPE_KIQ:
5517                 cmd = (1 << 16); /* no inc addr */
5518                 break;
5519         default:
5520                 cmd = WR_CONFIRM;
5521                 break;
5522         }
5523         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5524         amdgpu_ring_write(ring, cmd);
5525         amdgpu_ring_write(ring, reg);
5526         amdgpu_ring_write(ring, 0);
5527         amdgpu_ring_write(ring, val);
5528 }
5529
5530 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5531                                         uint32_t val, uint32_t mask)
5532 {
5533         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5534 }
5535
5536 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5537                                                   uint32_t reg0, uint32_t reg1,
5538                                                   uint32_t ref, uint32_t mask)
5539 {
5540         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5541         struct amdgpu_device *adev = ring->adev;
5542         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5543                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5544
5545         if (fw_version_ok)
5546                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5547                                       ref, mask, 0x20);
5548         else
5549                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5550                                                            ref, mask);
5551 }
5552
5553 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5554 {
5555         struct amdgpu_device *adev = ring->adev;
5556         uint32_t value = 0;
5557
5558         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5559         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5560         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5561         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5562         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5563 }
5564
5565 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5566                                                  enum amdgpu_interrupt_state state)
5567 {
5568         switch (state) {
5569         case AMDGPU_IRQ_STATE_DISABLE:
5570         case AMDGPU_IRQ_STATE_ENABLE:
5571                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5572                                TIME_STAMP_INT_ENABLE,
5573                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5574                 break;
5575         default:
5576                 break;
5577         }
5578 }
5579
5580 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5581                                                      int me, int pipe,
5582                                                      enum amdgpu_interrupt_state state)
5583 {
5584         u32 mec_int_cntl, mec_int_cntl_reg;
5585
5586         /*
5587          * amdgpu controls only the first MEC. That's why this function only
5588          * handles the setting of interrupts for this specific MEC. All other
5589          * pipes' interrupts are set by amdkfd.
5590          */
5591
5592         if (me == 1) {
5593                 switch (pipe) {
5594                 case 0:
5595                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5596                         break;
5597                 case 1:
5598                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5599                         break;
5600                 case 2:
5601                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5602                         break;
5603                 case 3:
5604                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5605                         break;
5606                 default:
5607                         DRM_DEBUG("invalid pipe %d\n", pipe);
5608                         return;
5609                 }
5610         } else {
5611                 DRM_DEBUG("invalid me %d\n", me);
5612                 return;
5613         }
5614
5615         switch (state) {
5616         case AMDGPU_IRQ_STATE_DISABLE:
5617                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5618                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5619                                              TIME_STAMP_INT_ENABLE, 0);
5620                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5621                 break;
5622         case AMDGPU_IRQ_STATE_ENABLE:
5623                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5624                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5625                                              TIME_STAMP_INT_ENABLE, 1);
5626                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5627                 break;
5628         default:
5629                 break;
5630         }
5631 }
5632
5633 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5634                                              struct amdgpu_irq_src *source,
5635                                              unsigned type,
5636                                              enum amdgpu_interrupt_state state)
5637 {
5638         switch (state) {
5639         case AMDGPU_IRQ_STATE_DISABLE:
5640         case AMDGPU_IRQ_STATE_ENABLE:
5641                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5642                                PRIV_REG_INT_ENABLE,
5643                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5644                 break;
5645         default:
5646                 break;
5647         }
5648
5649         return 0;
5650 }
5651
5652 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5653                                               struct amdgpu_irq_src *source,
5654                                               unsigned type,
5655                                               enum amdgpu_interrupt_state state)
5656 {
5657         switch (state) {
5658         case AMDGPU_IRQ_STATE_DISABLE:
5659         case AMDGPU_IRQ_STATE_ENABLE:
5660                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5661                                PRIV_INSTR_INT_ENABLE,
5662                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5663         default:
5664                 break;
5665         }
5666
5667         return 0;
5668 }
5669
5670 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
5671         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5672                         CP_ECC_ERROR_INT_ENABLE, 1)
5673
5674 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
5675         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5676                         CP_ECC_ERROR_INT_ENABLE, 0)
5677
5678 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5679                                               struct amdgpu_irq_src *source,
5680                                               unsigned type,
5681                                               enum amdgpu_interrupt_state state)
5682 {
5683         switch (state) {
5684         case AMDGPU_IRQ_STATE_DISABLE:
5685                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5686                                 CP_ECC_ERROR_INT_ENABLE, 0);
5687                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5688                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5689                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5690                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5691                 break;
5692
5693         case AMDGPU_IRQ_STATE_ENABLE:
5694                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5695                                 CP_ECC_ERROR_INT_ENABLE, 1);
5696                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5697                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5698                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5699                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5700                 break;
5701         default:
5702                 break;
5703         }
5704
5705         return 0;
5706 }
5707
5708
5709 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5710                                             struct amdgpu_irq_src *src,
5711                                             unsigned type,
5712                                             enum amdgpu_interrupt_state state)
5713 {
5714         switch (type) {
5715         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5716                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5717                 break;
5718         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5719                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5720                 break;
5721         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5722                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5723                 break;
5724         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5725                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5726                 break;
5727         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5728                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5729                 break;
5730         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5731                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5732                 break;
5733         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5734                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5735                 break;
5736         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5737                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5738                 break;
5739         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5740                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5741                 break;
5742         default:
5743                 break;
5744         }
5745         return 0;
5746 }
5747
5748 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5749                             struct amdgpu_irq_src *source,
5750                             struct amdgpu_iv_entry *entry)
5751 {
5752         int i;
5753         u8 me_id, pipe_id, queue_id;
5754         struct amdgpu_ring *ring;
5755
5756         DRM_DEBUG("IH: CP EOP\n");
5757         me_id = (entry->ring_id & 0x0c) >> 2;
5758         pipe_id = (entry->ring_id & 0x03) >> 0;
5759         queue_id = (entry->ring_id & 0x70) >> 4;
5760
5761         switch (me_id) {
5762         case 0:
5763                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5764                 break;
5765         case 1:
5766         case 2:
5767                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5768                         ring = &adev->gfx.compute_ring[i];
5769                         /* Per-queue interrupt is supported for MEC starting from VI.
5770                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5771                           */
5772                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5773                                 amdgpu_fence_process(ring);
5774                 }
5775                 break;
5776         }
5777         return 0;
5778 }
5779
5780 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5781                            struct amdgpu_iv_entry *entry)
5782 {
5783         u8 me_id, pipe_id, queue_id;
5784         struct amdgpu_ring *ring;
5785         int i;
5786
5787         me_id = (entry->ring_id & 0x0c) >> 2;
5788         pipe_id = (entry->ring_id & 0x03) >> 0;
5789         queue_id = (entry->ring_id & 0x70) >> 4;
5790
5791         switch (me_id) {
5792         case 0:
5793                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5794                 break;
5795         case 1:
5796         case 2:
5797                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5798                         ring = &adev->gfx.compute_ring[i];
5799                         if (ring->me == me_id && ring->pipe == pipe_id &&
5800                             ring->queue == queue_id)
5801                                 drm_sched_fault(&ring->sched);
5802                 }
5803                 break;
5804         }
5805 }
5806
5807 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5808                                  struct amdgpu_irq_src *source,
5809                                  struct amdgpu_iv_entry *entry)
5810 {
5811         DRM_ERROR("Illegal register access in command stream\n");
5812         gfx_v9_0_fault(adev, entry);
5813         return 0;
5814 }
5815
5816 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5817                                   struct amdgpu_irq_src *source,
5818                                   struct amdgpu_iv_entry *entry)
5819 {
5820         DRM_ERROR("Illegal instruction in command stream\n");
5821         gfx_v9_0_fault(adev, entry);
5822         return 0;
5823 }
5824
5825
5826 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5827         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5828           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5829           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5830         },
5831         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5832           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5833           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5834         },
5835         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5836           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5837           0, 0
5838         },
5839         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5840           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5841           0, 0
5842         },
5843         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5844           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5845           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5846         },
5847         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5848           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5849           0, 0
5850         },
5851         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5852           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5853           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5854         },
5855         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5856           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5857           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5858         },
5859         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5860           SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5861           0, 0
5862         },
5863         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5864           SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5865           0, 0
5866         },
5867         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5868           SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5869           0, 0
5870         },
5871         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5872           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5873           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5874         },
5875         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5876           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5877           0, 0
5878         },
5879         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5880           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5881           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5882         },
5883         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5884           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5885           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5886           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5887         },
5888         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5889           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5890           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5891           0, 0
5892         },
5893         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5894           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5895           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5896           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5897         },
5898         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5899           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5900           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5901           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5902         },
5903         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5904           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5905           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5906           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5907         },
5908         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5909           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5910           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5911           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5912         },
5913         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5914           SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5915           0, 0
5916         },
5917         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5918           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5919           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5920         },
5921         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5922           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5923           0, 0
5924         },
5925         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5926           SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5927           0, 0
5928         },
5929         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5930           SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5931           0, 0
5932         },
5933         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5934           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5935           0, 0
5936         },
5937         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5938           SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5939           0, 0
5940         },
5941         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5942           SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5943           0, 0
5944         },
5945         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5946           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5947           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5948         },
5949         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5950           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5951           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5952         },
5953         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5954           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5955           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5956         },
5957         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5958           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5959           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5960         },
5961         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5962           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5963           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5964         },
5965         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5966           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5967           0, 0
5968         },
5969         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5970           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5971           0, 0
5972         },
5973         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5974           SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5975           0, 0
5976         },
5977         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5978           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
5979           0, 0
5980         },
5981         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5982           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
5983           0, 0
5984         },
5985         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5986           SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
5987           0, 0
5988         },
5989         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5990           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
5991           0, 0
5992         },
5993         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5994           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
5995           0, 0
5996         },
5997         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5998           SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
5999           0, 0
6000         },
6001         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6002           SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6003           0, 0
6004         },
6005         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6006           SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6007           0, 0
6008         },
6009         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6010           SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6011           0, 0
6012         },
6013         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6014           SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6015           0, 0
6016         },
6017         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6018           SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6019           0, 0
6020         },
6021         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6022           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6023           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6024         },
6025         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6026           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6027           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6028         },
6029         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6030           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6031           0, 0
6032         },
6033         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6034           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6035           0, 0
6036         },
6037         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6038           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6039           0, 0
6040         },
6041         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6042           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6043           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6044         },
6045         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6046           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6047           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6048         },
6049         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6050           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6051           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6052         },
6053         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6054           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6055           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6056         },
6057         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6058           SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6059           0, 0
6060         },
6061         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6062           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6063           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6064         },
6065         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6066           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6067           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6068         },
6069         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6070           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6071           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6072         },
6073         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6074           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6075           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6076         },
6077         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6078           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6079           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6080         },
6081         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6082           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6083           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6084         },
6085         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6086           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6087           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6088         },
6089         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6090           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6091           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6092         },
6093         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6094           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6095           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6096         },
6097         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6098           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6099           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6100         },
6101         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6102           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6103           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6104         },
6105         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6106           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6107           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6108         },
6109         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6110           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6111           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6112         },
6113         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6114           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6115           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6116         },
6117         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6118           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6119           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6120         },
6121         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6122           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6123           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6124         },
6125         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6126           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6127           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6128         },
6129         { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6130           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6131           0, 0
6132         },
6133         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6134           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6135           0, 0
6136         },
6137         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6138           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6139           0, 0
6140         },
6141         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6142           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6143           0, 0
6144         },
6145         { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6146           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6147           0, 0
6148         },
6149         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6150           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6151           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6152         },
6153         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6154           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6155           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6156         },
6157         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6158           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6159           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6160         },
6161         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6162           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6163           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6164         },
6165         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6166           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6167           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6168         },
6169         { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6170           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6171           0, 0
6172         },
6173         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6174           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6175           0, 0
6176         },
6177         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6178           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6179           0, 0
6180         },
6181         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6182           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6183           0, 0
6184         },
6185         { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6186           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6187           0, 0
6188         },
6189         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6190           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6191           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6192         },
6193         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6194           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6195           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6196         },
6197         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6198           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6199           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6200         },
6201         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6202           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6203           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6204         },
6205         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6206           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6207           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6208         },
6209         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6210           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6211           0, 0
6212         },
6213         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6214           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6215           0, 0
6216         },
6217         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6218           SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6219           0, 0
6220         },
6221         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6222           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6223           0, 0
6224         },
6225         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6226           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6227           0, 0
6228         },
6229         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6230           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6231           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6232         },
6233         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6234           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6235           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6236         },
6237         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6238           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6239           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6240         },
6241         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6242           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6243           0, 0
6244         },
6245         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6246           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6247           0, 0
6248         },
6249         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6250           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6251           0, 0
6252         },
6253         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6254           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6255           0, 0
6256         },
6257         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6258           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6259           0, 0
6260         },
6261         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6262           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6263           0, 0
6264         }
6265 };
6266
6267 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6268                                      void *inject_if)
6269 {
6270         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6271         int ret;
6272         struct ta_ras_trigger_error_input block_info = { 0 };
6273
6274         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6275                 return -EINVAL;
6276
6277         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6278                 return -EINVAL;
6279
6280         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6281                 return -EPERM;
6282
6283         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6284               info->head.type)) {
6285                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6286                         ras_gfx_subblocks[info->head.sub_block_index].name,
6287                         info->head.type);
6288                 return -EPERM;
6289         }
6290
6291         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6292               info->head.type)) {
6293                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6294                         ras_gfx_subblocks[info->head.sub_block_index].name,
6295                         info->head.type);
6296                 return -EPERM;
6297         }
6298
6299         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6300         block_info.sub_block_index =
6301                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6302         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6303         block_info.address = info->address;
6304         block_info.value = info->value;
6305
6306         mutex_lock(&adev->grbm_idx_mutex);
6307         ret = psp_ras_trigger_error(&adev->psp, &block_info);
6308         mutex_unlock(&adev->grbm_idx_mutex);
6309
6310         return ret;
6311 }
6312
6313 static const char *vml2_mems[] = {
6314         "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6315         "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6316         "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6317         "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6318         "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6319         "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6320         "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6321         "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6322         "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6323         "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6324         "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6325         "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6326         "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6327         "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6328         "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6329         "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6330 };
6331
6332 static const char *vml2_walker_mems[] = {
6333         "UTC_VML2_CACHE_PDE0_MEM0",
6334         "UTC_VML2_CACHE_PDE0_MEM1",
6335         "UTC_VML2_CACHE_PDE1_MEM0",
6336         "UTC_VML2_CACHE_PDE1_MEM1",
6337         "UTC_VML2_CACHE_PDE2_MEM0",
6338         "UTC_VML2_CACHE_PDE2_MEM1",
6339         "UTC_VML2_RDIF_LOG_FIFO",
6340 };
6341
6342 static const char *atc_l2_cache_2m_mems[] = {
6343         "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6344         "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6345         "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6346         "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6347 };
6348
6349 static const char *atc_l2_cache_4k_mems[] = {
6350         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6351         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6352         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6353         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6354         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6355         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6356         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6357         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6358         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6359         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6360         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6361         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6362         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6363         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6364         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6365         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6366         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6367         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6368         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6369         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6370         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6371         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6372         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6373         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6374         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6375         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6376         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6377         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6378         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6379         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6380         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6381         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6382 };
6383
6384 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6385                                          struct ras_err_data *err_data)
6386 {
6387         uint32_t i, data;
6388         uint32_t sec_count, ded_count;
6389
6390         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6391         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6392         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6393         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6394         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6395         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6396         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6397         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6398
6399         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6400                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6401                 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6402
6403                 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6404                 if (sec_count) {
6405                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6406                                  vml2_mems[i], sec_count);
6407                         err_data->ce_count += sec_count;
6408                 }
6409
6410                 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6411                 if (ded_count) {
6412                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6413                                  vml2_mems[i], ded_count);
6414                         err_data->ue_count += ded_count;
6415                 }
6416         }
6417
6418         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6419                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6420                 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6421
6422                 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6423                                                 SEC_COUNT);
6424                 if (sec_count) {
6425                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6426                                  vml2_walker_mems[i], sec_count);
6427                         err_data->ce_count += sec_count;
6428                 }
6429
6430                 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6431                                                 DED_COUNT);
6432                 if (ded_count) {
6433                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6434                                  vml2_walker_mems[i], ded_count);
6435                         err_data->ue_count += ded_count;
6436                 }
6437         }
6438
6439         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6440                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6441                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6442
6443                 sec_count = (data & 0x00006000L) >> 0xd;
6444                 if (sec_count) {
6445                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6446                                  atc_l2_cache_2m_mems[i], sec_count);
6447                         err_data->ce_count += sec_count;
6448                 }
6449         }
6450
6451         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6452                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6453                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6454
6455                 sec_count = (data & 0x00006000L) >> 0xd;
6456                 if (sec_count) {
6457                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6458                                  atc_l2_cache_4k_mems[i], sec_count);
6459                         err_data->ce_count += sec_count;
6460                 }
6461
6462                 ded_count = (data & 0x00018000L) >> 0xf;
6463                 if (ded_count) {
6464                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6465                                  atc_l2_cache_4k_mems[i], ded_count);
6466                         err_data->ue_count += ded_count;
6467                 }
6468         }
6469
6470         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6471         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6472         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6473         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6474
6475         return 0;
6476 }
6477
6478 static int gfx_v9_0_ras_error_count(const struct soc15_reg_entry *reg,
6479         uint32_t se_id, uint32_t inst_id, uint32_t value,
6480         uint32_t *sec_count, uint32_t *ded_count)
6481 {
6482         uint32_t i;
6483         uint32_t sec_cnt, ded_cnt;
6484
6485         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6486                 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6487                         gfx_v9_0_ras_fields[i].seg != reg->seg ||
6488                         gfx_v9_0_ras_fields[i].inst != reg->inst)
6489                         continue;
6490
6491                 sec_cnt = (value &
6492                                 gfx_v9_0_ras_fields[i].sec_count_mask) >>
6493                                 gfx_v9_0_ras_fields[i].sec_count_shift;
6494                 if (sec_cnt) {
6495                         DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
6496                                 gfx_v9_0_ras_fields[i].name,
6497                                 se_id, inst_id,
6498                                 sec_cnt);
6499                         *sec_count += sec_cnt;
6500                 }
6501
6502                 ded_cnt = (value &
6503                                 gfx_v9_0_ras_fields[i].ded_count_mask) >>
6504                                 gfx_v9_0_ras_fields[i].ded_count_shift;
6505                 if (ded_cnt) {
6506                         DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
6507                                 gfx_v9_0_ras_fields[i].name,
6508                                 se_id, inst_id,
6509                                 ded_cnt);
6510                         *ded_count += ded_cnt;
6511                 }
6512         }
6513
6514         return 0;
6515 }
6516
6517 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6518 {
6519         int i, j, k;
6520
6521         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6522                 return;
6523
6524         /* read back registers to clear the counters */
6525         mutex_lock(&adev->grbm_idx_mutex);
6526         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6527                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6528                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6529                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6530                                 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6531                         }
6532                 }
6533         }
6534         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6535         mutex_unlock(&adev->grbm_idx_mutex);
6536
6537         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6538         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6539         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6540         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6541         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6542         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6543         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6544         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6545
6546         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6547                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6548                 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6549         }
6550
6551         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6552                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6553                 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6554         }
6555
6556         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6557                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6558                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6559         }
6560
6561         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6562                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6563                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6564         }
6565
6566         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6567         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6568         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6569         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6570 }
6571
6572 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6573                                           void *ras_error_status)
6574 {
6575         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6576         uint32_t sec_count = 0, ded_count = 0;
6577         uint32_t i, j, k;
6578         uint32_t reg_value;
6579
6580         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6581                 return -EINVAL;
6582
6583         err_data->ue_count = 0;
6584         err_data->ce_count = 0;
6585
6586         mutex_lock(&adev->grbm_idx_mutex);
6587
6588         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6589                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6590                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6591                                 gfx_v9_0_select_se_sh(adev, j, 0, k);
6592                                 reg_value =
6593                                         RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6594                                 if (reg_value)
6595                                         gfx_v9_0_ras_error_count(&gfx_v9_0_edc_counter_regs[i],
6596                                                         j, k, reg_value,
6597                                                         &sec_count, &ded_count);
6598                         }
6599                 }
6600         }
6601
6602         err_data->ce_count += sec_count;
6603         err_data->ue_count += ded_count;
6604
6605         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6606         mutex_unlock(&adev->grbm_idx_mutex);
6607
6608         gfx_v9_0_query_utc_edc_status(adev, err_data);
6609
6610         return 0;
6611 }
6612
6613 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6614         .name = "gfx_v9_0",
6615         .early_init = gfx_v9_0_early_init,
6616         .late_init = gfx_v9_0_late_init,
6617         .sw_init = gfx_v9_0_sw_init,
6618         .sw_fini = gfx_v9_0_sw_fini,
6619         .hw_init = gfx_v9_0_hw_init,
6620         .hw_fini = gfx_v9_0_hw_fini,
6621         .suspend = gfx_v9_0_suspend,
6622         .resume = gfx_v9_0_resume,
6623         .is_idle = gfx_v9_0_is_idle,
6624         .wait_for_idle = gfx_v9_0_wait_for_idle,
6625         .soft_reset = gfx_v9_0_soft_reset,
6626         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6627         .set_powergating_state = gfx_v9_0_set_powergating_state,
6628         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6629 };
6630
6631 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6632         .type = AMDGPU_RING_TYPE_GFX,
6633         .align_mask = 0xff,
6634         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6635         .support_64bit_ptrs = true,
6636         .vmhub = AMDGPU_GFXHUB_0,
6637         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6638         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6639         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6640         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6641                 5 +  /* COND_EXEC */
6642                 7 +  /* PIPELINE_SYNC */
6643                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6644                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6645                 2 + /* VM_FLUSH */
6646                 8 +  /* FENCE for VM_FLUSH */
6647                 20 + /* GDS switch */
6648                 4 + /* double SWITCH_BUFFER,
6649                        the first COND_EXEC jump to the place just
6650                            prior to this double SWITCH_BUFFER  */
6651                 5 + /* COND_EXEC */
6652                 7 +      /*     HDP_flush */
6653                 4 +      /*     VGT_flush */
6654                 14 + /* CE_META */
6655                 31 + /* DE_META */
6656                 3 + /* CNTX_CTRL */
6657                 5 + /* HDP_INVL */
6658                 8 + 8 + /* FENCE x2 */
6659                 2, /* SWITCH_BUFFER */
6660         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6661         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6662         .emit_fence = gfx_v9_0_ring_emit_fence,
6663         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6664         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6665         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6666         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6667         .test_ring = gfx_v9_0_ring_test_ring,
6668         .test_ib = gfx_v9_0_ring_test_ib,
6669         .insert_nop = amdgpu_ring_insert_nop,
6670         .pad_ib = amdgpu_ring_generic_pad_ib,
6671         .emit_switch_buffer = gfx_v9_ring_emit_sb,
6672         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6673         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6674         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6675         .emit_tmz = gfx_v9_0_ring_emit_tmz,
6676         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6677         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6678         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6679         .soft_recovery = gfx_v9_0_ring_soft_recovery,
6680 };
6681
6682 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6683         .type = AMDGPU_RING_TYPE_COMPUTE,
6684         .align_mask = 0xff,
6685         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6686         .support_64bit_ptrs = true,
6687         .vmhub = AMDGPU_GFXHUB_0,
6688         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6689         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6690         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6691         .emit_frame_size =
6692                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6693                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6694                 5 + /* hdp invalidate */
6695                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6696                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6697                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6698                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6699                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6700         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6701         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6702         .emit_fence = gfx_v9_0_ring_emit_fence,
6703         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6704         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6705         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6706         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6707         .test_ring = gfx_v9_0_ring_test_ring,
6708         .test_ib = gfx_v9_0_ring_test_ib,
6709         .insert_nop = amdgpu_ring_insert_nop,
6710         .pad_ib = amdgpu_ring_generic_pad_ib,
6711         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6712         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6713         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6714 };
6715
6716 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6717         .type = AMDGPU_RING_TYPE_KIQ,
6718         .align_mask = 0xff,
6719         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6720         .support_64bit_ptrs = true,
6721         .vmhub = AMDGPU_GFXHUB_0,
6722         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6723         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6724         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6725         .emit_frame_size =
6726                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6727                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6728                 5 + /* hdp invalidate */
6729                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6730                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6731                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6732                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6733                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6734         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6735         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6736         .test_ring = gfx_v9_0_ring_test_ring,
6737         .insert_nop = amdgpu_ring_insert_nop,
6738         .pad_ib = amdgpu_ring_generic_pad_ib,
6739         .emit_rreg = gfx_v9_0_ring_emit_rreg,
6740         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6741         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6742         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6743 };
6744
6745 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6746 {
6747         int i;
6748
6749         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6750
6751         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6752                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6753
6754         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6755                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6756 }
6757
6758 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6759         .set = gfx_v9_0_set_eop_interrupt_state,
6760         .process = gfx_v9_0_eop_irq,
6761 };
6762
6763 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6764         .set = gfx_v9_0_set_priv_reg_fault_state,
6765         .process = gfx_v9_0_priv_reg_irq,
6766 };
6767
6768 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6769         .set = gfx_v9_0_set_priv_inst_fault_state,
6770         .process = gfx_v9_0_priv_inst_irq,
6771 };
6772
6773 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6774         .set = gfx_v9_0_set_cp_ecc_error_state,
6775         .process = amdgpu_gfx_cp_ecc_error_irq,
6776 };
6777
6778
6779 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6780 {
6781         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6782         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6783
6784         adev->gfx.priv_reg_irq.num_types = 1;
6785         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6786
6787         adev->gfx.priv_inst_irq.num_types = 1;
6788         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6789
6790         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6791         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6792 }
6793
6794 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6795 {
6796         switch (adev->asic_type) {
6797         case CHIP_VEGA10:
6798         case CHIP_VEGA12:
6799         case CHIP_VEGA20:
6800         case CHIP_RAVEN:
6801         case CHIP_ARCTURUS:
6802         case CHIP_RENOIR:
6803                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6804                 break;
6805         default:
6806                 break;
6807         }
6808 }
6809
6810 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6811 {
6812         /* init asci gds info */
6813         switch (adev->asic_type) {
6814         case CHIP_VEGA10:
6815         case CHIP_VEGA12:
6816         case CHIP_VEGA20:
6817                 adev->gds.gds_size = 0x10000;
6818                 break;
6819         case CHIP_RAVEN:
6820         case CHIP_ARCTURUS:
6821                 adev->gds.gds_size = 0x1000;
6822                 break;
6823         default:
6824                 adev->gds.gds_size = 0x10000;
6825                 break;
6826         }
6827
6828         switch (adev->asic_type) {
6829         case CHIP_VEGA10:
6830         case CHIP_VEGA20:
6831                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6832                 break;
6833         case CHIP_VEGA12:
6834                 adev->gds.gds_compute_max_wave_id = 0x27f;
6835                 break;
6836         case CHIP_RAVEN:
6837                 if (adev->rev_id >= 0x8)
6838                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6839                 else
6840                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6841                 break;
6842         case CHIP_ARCTURUS:
6843                 adev->gds.gds_compute_max_wave_id = 0xfff;
6844                 break;
6845         default:
6846                 /* this really depends on the chip */
6847                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6848                 break;
6849         }
6850
6851         adev->gds.gws_size = 64;
6852         adev->gds.oa_size = 16;
6853 }
6854
6855 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6856                                                  u32 bitmap)
6857 {
6858         u32 data;
6859
6860         if (!bitmap)
6861                 return;
6862
6863         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6864         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6865
6866         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6867 }
6868
6869 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6870 {
6871         u32 data, mask;
6872
6873         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6874         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6875
6876         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6877         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6878
6879         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6880
6881         return (~data) & mask;
6882 }
6883
6884 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6885                                  struct amdgpu_cu_info *cu_info)
6886 {
6887         int i, j, k, counter, active_cu_number = 0;
6888         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6889         unsigned disable_masks[4 * 4];
6890
6891         if (!adev || !cu_info)
6892                 return -EINVAL;
6893
6894         /*
6895          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6896          */
6897         if (adev->gfx.config.max_shader_engines *
6898                 adev->gfx.config.max_sh_per_se > 16)
6899                 return -EINVAL;
6900
6901         amdgpu_gfx_parse_disable_cu(disable_masks,
6902                                     adev->gfx.config.max_shader_engines,
6903                                     adev->gfx.config.max_sh_per_se);
6904
6905         mutex_lock(&adev->grbm_idx_mutex);
6906         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6907                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6908                         mask = 1;
6909                         ao_bitmap = 0;
6910                         counter = 0;
6911                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6912                         gfx_v9_0_set_user_cu_inactive_bitmap(
6913                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6914                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6915
6916                         /*
6917                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
6918                          * 4x4 size array, and it's usually suitable for Vega
6919                          * ASICs which has 4*2 SE/SH layout.
6920                          * But for Arcturus, SE/SH layout is changed to 8*1.
6921                          * To mostly reduce the impact, we make it compatible
6922                          * with current bitmap array as below:
6923                          *    SE4,SH0 --> bitmap[0][1]
6924                          *    SE5,SH0 --> bitmap[1][1]
6925                          *    SE6,SH0 --> bitmap[2][1]
6926                          *    SE7,SH0 --> bitmap[3][1]
6927                          */
6928                         cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6929
6930                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6931                                 if (bitmap & mask) {
6932                                         if (counter < adev->gfx.config.max_cu_per_sh)
6933                                                 ao_bitmap |= mask;
6934                                         counter ++;
6935                                 }
6936                                 mask <<= 1;
6937                         }
6938                         active_cu_number += counter;
6939                         if (i < 2 && j < 2)
6940                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6941                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6942                 }
6943         }
6944         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6945         mutex_unlock(&adev->grbm_idx_mutex);
6946
6947         cu_info->number = active_cu_number;
6948         cu_info->ao_cu_mask = ao_cu_mask;
6949         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6950
6951         return 0;
6952 }
6953
6954 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6955 {
6956         .type = AMD_IP_BLOCK_TYPE_GFX,
6957         .major = 9,
6958         .minor = 0,
6959         .rev = 0,
6960         .funcs = &gfx_v9_0_ip_funcs,
6961 };