drm/amdgpu: fix memory overflow in the IB test
[platform/kernel/linux-rpi.git] / drivers / gpu / drm / amd / amdgpu / gfx_v11_0.c
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/delay.h>
24 #include <linux/kernel.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include "amdgpu.h"
29 #include "amdgpu_gfx.h"
30 #include "amdgpu_psp.h"
31 #include "amdgpu_smu.h"
32 #include "amdgpu_atomfirmware.h"
33 #include "imu_v11_0.h"
34 #include "soc21.h"
35 #include "nvd.h"
36
37 #include "gc/gc_11_0_0_offset.h"
38 #include "gc/gc_11_0_0_sh_mask.h"
39 #include "smuio/smuio_13_0_6_offset.h"
40 #include "smuio/smuio_13_0_6_sh_mask.h"
41 #include "navi10_enum.h"
42 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
43
44 #include "soc15.h"
45 #include "soc15d.h"
46 #include "clearstate_gfx11.h"
47 #include "v11_structs.h"
48 #include "gfx_v11_0.h"
49 #include "gfx_v11_0_3.h"
50 #include "nbio_v4_3.h"
51 #include "mes_v11_0.h"
52
53 #define GFX11_NUM_GFX_RINGS             1
54 #define GFX11_MEC_HPD_SIZE      2048
55
56 #define RLCG_UCODE_LOADING_START_ADDRESS        0x00002000L
57 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1        0x1388
58
59 #define regCGTT_WD_CLK_CTRL             0x5086
60 #define regCGTT_WD_CLK_CTRL_BASE_IDX    1
61 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1   0x4e7e
62 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX  1
63
64 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
65 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
66 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
67 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
68 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
69 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
70 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
71 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
72 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
73 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin");
74 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin");
75 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin");
76 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin");
77 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
78 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
79 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
80 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
81 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
83 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
84 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
85
86 static const struct soc15_reg_golden golden_settings_gc_11_0[] = {
87         SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000)
88 };
89
90 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
91 {
92         SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
93         SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010),
94         SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
95         SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988),
96         SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007),
97         SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008),
98         SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100),
99         SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
100         SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
101 };
102
103 #define DEFAULT_SH_MEM_CONFIG \
104         ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
105          (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
106          (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
107
108 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev);
109 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev);
110 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev);
111 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev);
112 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev);
113 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev);
114 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev);
115 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
116                                  struct amdgpu_cu_info *cu_info);
117 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev);
118 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
119                                    u32 sh_num, u32 instance, int xcc_id);
120 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
121
122 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
123 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
124 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
125                                      uint32_t val);
126 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
127 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
128                                            uint16_t pasid, uint32_t flush_type,
129                                            bool all_hub, uint8_t dst_sel);
130 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
131 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
132 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
133                                       bool enable);
134
135 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
136 {
137         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
138         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
139                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
140         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
141         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
142         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
143         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
144         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
145         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
146 }
147
148 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring,
149                                  struct amdgpu_ring *ring)
150 {
151         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
152         uint64_t wptr_addr = ring->wptr_gpu_addr;
153         uint32_t me = 0, eng_sel = 0;
154
155         switch (ring->funcs->type) {
156         case AMDGPU_RING_TYPE_COMPUTE:
157                 me = 1;
158                 eng_sel = 0;
159                 break;
160         case AMDGPU_RING_TYPE_GFX:
161                 me = 0;
162                 eng_sel = 4;
163                 break;
164         case AMDGPU_RING_TYPE_MES:
165                 me = 2;
166                 eng_sel = 5;
167                 break;
168         default:
169                 WARN_ON(1);
170         }
171
172         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
173         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
174         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
175                           PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
176                           PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
177                           PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
178                           PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
179                           PACKET3_MAP_QUEUES_ME((me)) |
180                           PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
181                           PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
182                           PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
183                           PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
184         amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
185         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
186         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
187         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
188         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
189 }
190
191 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
192                                    struct amdgpu_ring *ring,
193                                    enum amdgpu_unmap_queues_action action,
194                                    u64 gpu_addr, u64 seq)
195 {
196         struct amdgpu_device *adev = kiq_ring->adev;
197         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
198
199         if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
200                 amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
201                 return;
202         }
203
204         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
205         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
206                           PACKET3_UNMAP_QUEUES_ACTION(action) |
207                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
208                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
209                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
210         amdgpu_ring_write(kiq_ring,
211                   PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
212
213         if (action == PREEMPT_QUEUES_NO_UNMAP) {
214                 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
215                 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
216                 amdgpu_ring_write(kiq_ring, seq);
217         } else {
218                 amdgpu_ring_write(kiq_ring, 0);
219                 amdgpu_ring_write(kiq_ring, 0);
220                 amdgpu_ring_write(kiq_ring, 0);
221         }
222 }
223
224 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring,
225                                    struct amdgpu_ring *ring,
226                                    u64 addr,
227                                    u64 seq)
228 {
229         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
230
231         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
232         amdgpu_ring_write(kiq_ring,
233                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
234                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
235                           PACKET3_QUERY_STATUS_COMMAND(2));
236         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
237                           PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
238                           PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
239         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
240         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
241         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
242         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
243 }
244
245 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
246                                 uint16_t pasid, uint32_t flush_type,
247                                 bool all_hub)
248 {
249         gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
250 }
251
252 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = {
253         .kiq_set_resources = gfx11_kiq_set_resources,
254         .kiq_map_queues = gfx11_kiq_map_queues,
255         .kiq_unmap_queues = gfx11_kiq_unmap_queues,
256         .kiq_query_status = gfx11_kiq_query_status,
257         .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs,
258         .set_resources_size = 8,
259         .map_queues_size = 7,
260         .unmap_queues_size = 6,
261         .query_status_size = 7,
262         .invalidate_tlbs_size = 2,
263 };
264
265 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
266 {
267         adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs;
268 }
269
270 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
271 {
272         switch (adev->ip_versions[GC_HWIP][0]) {
273         case IP_VERSION(11, 0, 1):
274         case IP_VERSION(11, 0, 4):
275                 soc15_program_register_sequence(adev,
276                                                 golden_settings_gc_11_0_1,
277                                                 (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
278                 break;
279         default:
280                 break;
281         }
282         soc15_program_register_sequence(adev,
283                                         golden_settings_gc_11_0,
284                                         (const u32)ARRAY_SIZE(golden_settings_gc_11_0));
285
286 }
287
288 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
289                                        bool wc, uint32_t reg, uint32_t val)
290 {
291         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
292         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
293                           WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
294         amdgpu_ring_write(ring, reg);
295         amdgpu_ring_write(ring, 0);
296         amdgpu_ring_write(ring, val);
297 }
298
299 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
300                                   int mem_space, int opt, uint32_t addr0,
301                                   uint32_t addr1, uint32_t ref, uint32_t mask,
302                                   uint32_t inv)
303 {
304         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
305         amdgpu_ring_write(ring,
306                           /* memory (1) or register (0) */
307                           (WAIT_REG_MEM_MEM_SPACE(mem_space) |
308                            WAIT_REG_MEM_OPERATION(opt) | /* wait */
309                            WAIT_REG_MEM_FUNCTION(3) |  /* equal */
310                            WAIT_REG_MEM_ENGINE(eng_sel)));
311
312         if (mem_space)
313                 BUG_ON(addr0 & 0x3); /* Dword align */
314         amdgpu_ring_write(ring, addr0);
315         amdgpu_ring_write(ring, addr1);
316         amdgpu_ring_write(ring, ref);
317         amdgpu_ring_write(ring, mask);
318         amdgpu_ring_write(ring, inv); /* poll interval */
319 }
320
321 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
322 {
323         struct amdgpu_device *adev = ring->adev;
324         uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
325         uint32_t tmp = 0;
326         unsigned i;
327         int r;
328
329         WREG32(scratch, 0xCAFEDEAD);
330         r = amdgpu_ring_alloc(ring, 5);
331         if (r) {
332                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
333                           ring->idx, r);
334                 return r;
335         }
336
337         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
338                 gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
339         } else {
340                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
341                 amdgpu_ring_write(ring, scratch -
342                                   PACKET3_SET_UCONFIG_REG_START);
343                 amdgpu_ring_write(ring, 0xDEADBEEF);
344         }
345         amdgpu_ring_commit(ring);
346
347         for (i = 0; i < adev->usec_timeout; i++) {
348                 tmp = RREG32(scratch);
349                 if (tmp == 0xDEADBEEF)
350                         break;
351                 if (amdgpu_emu_mode == 1)
352                         msleep(1);
353                 else
354                         udelay(1);
355         }
356
357         if (i >= adev->usec_timeout)
358                 r = -ETIMEDOUT;
359         return r;
360 }
361
362 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
363 {
364         struct amdgpu_device *adev = ring->adev;
365         struct amdgpu_ib ib;
366         struct dma_fence *f = NULL;
367         unsigned index;
368         uint64_t gpu_addr;
369         volatile uint32_t *cpu_ptr;
370         long r;
371
372         /* MES KIQ fw hasn't indirect buffer support for now */
373         if (adev->enable_mes_kiq &&
374             ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
375                 return 0;
376
377         memset(&ib, 0, sizeof(ib));
378
379         if (ring->is_mes_queue) {
380                 uint32_t padding, offset;
381
382                 offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
383                 padding = amdgpu_mes_ctx_get_offs(ring,
384                                                   AMDGPU_MES_CTX_PADDING_OFFS);
385
386                 ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
387                 ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
388
389                 gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
390                 cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
391                 *cpu_ptr = cpu_to_le32(0xCAFEDEAD);
392         } else {
393                 r = amdgpu_device_wb_get(adev, &index);
394                 if (r)
395                         return r;
396
397                 gpu_addr = adev->wb.gpu_addr + (index * 4);
398                 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
399                 cpu_ptr = &adev->wb.wb[index];
400
401                 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
402                 if (r) {
403                         DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
404                         goto err1;
405                 }
406         }
407
408         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
409         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
410         ib.ptr[2] = lower_32_bits(gpu_addr);
411         ib.ptr[3] = upper_32_bits(gpu_addr);
412         ib.ptr[4] = 0xDEADBEEF;
413         ib.length_dw = 5;
414
415         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
416         if (r)
417                 goto err2;
418
419         r = dma_fence_wait_timeout(f, false, timeout);
420         if (r == 0) {
421                 r = -ETIMEDOUT;
422                 goto err2;
423         } else if (r < 0) {
424                 goto err2;
425         }
426
427         if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
428                 r = 0;
429         else
430                 r = -EINVAL;
431 err2:
432         if (!ring->is_mes_queue)
433                 amdgpu_ib_free(adev, &ib, NULL);
434         dma_fence_put(f);
435 err1:
436         if (!ring->is_mes_queue)
437                 amdgpu_device_wb_free(adev, index);
438         return r;
439 }
440
441 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev)
442 {
443         amdgpu_ucode_release(&adev->gfx.pfp_fw);
444         amdgpu_ucode_release(&adev->gfx.me_fw);
445         amdgpu_ucode_release(&adev->gfx.rlc_fw);
446         amdgpu_ucode_release(&adev->gfx.mec_fw);
447
448         kfree(adev->gfx.rlc.register_list_format);
449 }
450
451 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
452 {
453         const struct psp_firmware_header_v1_0 *toc_hdr;
454         int err = 0;
455         char fw_name[40];
456
457         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix);
458         err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, fw_name);
459         if (err)
460                 goto out;
461
462         toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
463         adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
464         adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
465         adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
466         adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
467                                 le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
468         return 0;
469 out:
470         amdgpu_ucode_release(&adev->psp.toc_fw);
471         return err;
472 }
473
474 static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
475 {
476         switch (adev->ip_versions[GC_HWIP][0]) {
477         case IP_VERSION(11, 0, 0):
478         case IP_VERSION(11, 0, 2):
479         case IP_VERSION(11, 0, 3):
480                 if ((adev->gfx.me_fw_version >= 1505) &&
481                     (adev->gfx.pfp_fw_version >= 1600) &&
482                     (adev->gfx.mec_fw_version >= 512)) {
483                         if (amdgpu_sriov_vf(adev))
484                                 adev->gfx.cp_gfx_shadow = true;
485                         else
486                                 adev->gfx.cp_gfx_shadow = false;
487                 }
488                 break;
489         default:
490                 adev->gfx.cp_gfx_shadow = false;
491                 break;
492         }
493 }
494
495 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
496 {
497         char fw_name[40];
498         char ucode_prefix[30];
499         int err;
500         const struct rlc_firmware_header_v2_0 *rlc_hdr;
501         uint16_t version_major;
502         uint16_t version_minor;
503
504         DRM_DEBUG("\n");
505
506         amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
507
508         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix);
509         err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
510         if (err)
511                 goto out;
512         /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
513         adev->gfx.rs64_enable = amdgpu_ucode_hdr_version(
514                                 (union amdgpu_firmware_header *)
515                                 adev->gfx.pfp_fw->data, 2, 0);
516         if (adev->gfx.rs64_enable) {
517                 dev_info(adev->dev, "CP RS64 enable\n");
518                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
519                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
520                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK);
521         } else {
522                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
523         }
524
525         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix);
526         err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
527         if (err)
528                 goto out;
529         if (adev->gfx.rs64_enable) {
530                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
531                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
532                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK);
533         } else {
534                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
535         }
536
537         if (!amdgpu_sriov_vf(adev)) {
538                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
539                 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
540                 if (err)
541                         goto out;
542                 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
543                 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
544                 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
545                 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
546                 if (err)
547                         goto out;
548         }
549
550         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix);
551         err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
552         if (err)
553                 goto out;
554         if (adev->gfx.rs64_enable) {
555                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
556                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
557                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
558                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK);
559                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK);
560         } else {
561                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
562                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
563         }
564
565         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
566                 err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix);
567
568         /* only one MEC for gfx 11.0.0. */
569         adev->gfx.mec2_fw = NULL;
570
571         gfx_v11_0_check_fw_cp_gfx_shadow(adev);
572 out:
573         if (err) {
574                 amdgpu_ucode_release(&adev->gfx.pfp_fw);
575                 amdgpu_ucode_release(&adev->gfx.me_fw);
576                 amdgpu_ucode_release(&adev->gfx.rlc_fw);
577                 amdgpu_ucode_release(&adev->gfx.mec_fw);
578         }
579
580         return err;
581 }
582
583 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
584 {
585         u32 count = 0;
586         const struct cs_section_def *sect = NULL;
587         const struct cs_extent_def *ext = NULL;
588
589         /* begin clear state */
590         count += 2;
591         /* context control state */
592         count += 3;
593
594         for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
595                 for (ext = sect->section; ext->extent != NULL; ++ext) {
596                         if (sect->id == SECT_CONTEXT)
597                                 count += 2 + ext->reg_count;
598                         else
599                                 return 0;
600                 }
601         }
602
603         /* set PA_SC_TILE_STEERING_OVERRIDE */
604         count += 3;
605         /* end clear state */
606         count += 2;
607         /* clear state */
608         count += 2;
609
610         return count;
611 }
612
613 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
614                                     volatile u32 *buffer)
615 {
616         u32 count = 0, i;
617         const struct cs_section_def *sect = NULL;
618         const struct cs_extent_def *ext = NULL;
619         int ctx_reg_offset;
620
621         if (adev->gfx.rlc.cs_data == NULL)
622                 return;
623         if (buffer == NULL)
624                 return;
625
626         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
627         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
628
629         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
630         buffer[count++] = cpu_to_le32(0x80000000);
631         buffer[count++] = cpu_to_le32(0x80000000);
632
633         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
634                 for (ext = sect->section; ext->extent != NULL; ++ext) {
635                         if (sect->id == SECT_CONTEXT) {
636                                 buffer[count++] =
637                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
638                                 buffer[count++] = cpu_to_le32(ext->reg_index -
639                                                 PACKET3_SET_CONTEXT_REG_START);
640                                 for (i = 0; i < ext->reg_count; i++)
641                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
642                         } else {
643                                 return;
644                         }
645                 }
646         }
647
648         ctx_reg_offset =
649                 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
650         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
651         buffer[count++] = cpu_to_le32(ctx_reg_offset);
652         buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
653
654         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
655         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
656
657         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
658         buffer[count++] = cpu_to_le32(0);
659 }
660
661 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
662 {
663         /* clear state block */
664         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
665                         &adev->gfx.rlc.clear_state_gpu_addr,
666                         (void **)&adev->gfx.rlc.cs_ptr);
667
668         /* jump table block */
669         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
670                         &adev->gfx.rlc.cp_table_gpu_addr,
671                         (void **)&adev->gfx.rlc.cp_table_ptr);
672 }
673
674 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
675 {
676         struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
677
678         reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
679         reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
680         reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
681         reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
682         reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
683         reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
684         reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
685         reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
686         adev->gfx.rlc.rlcg_reg_access_supported = true;
687 }
688
689 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
690 {
691         const struct cs_section_def *cs_data;
692         int r;
693
694         adev->gfx.rlc.cs_data = gfx11_cs_data;
695
696         cs_data = adev->gfx.rlc.cs_data;
697
698         if (cs_data) {
699                 /* init clear state block */
700                 r = amdgpu_gfx_rlc_init_csb(adev);
701                 if (r)
702                         return r;
703         }
704
705         /* init spm vmid with 0xf */
706         if (adev->gfx.rlc.funcs->update_spm_vmid)
707                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
708
709         return 0;
710 }
711
712 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
713 {
714         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
715         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
716         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
717 }
718
719 static void gfx_v11_0_me_init(struct amdgpu_device *adev)
720 {
721         bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
722
723         amdgpu_gfx_graphics_queue_acquire(adev);
724 }
725
726 static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
727 {
728         int r;
729         u32 *hpd;
730         size_t mec_hpd_size;
731
732         bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
733
734         /* take ownership of the relevant compute queues */
735         amdgpu_gfx_compute_queue_acquire(adev);
736         mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE;
737
738         if (mec_hpd_size) {
739                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
740                                               AMDGPU_GEM_DOMAIN_GTT,
741                                               &adev->gfx.mec.hpd_eop_obj,
742                                               &adev->gfx.mec.hpd_eop_gpu_addr,
743                                               (void **)&hpd);
744                 if (r) {
745                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
746                         gfx_v11_0_mec_fini(adev);
747                         return r;
748                 }
749
750                 memset(hpd, 0, mec_hpd_size);
751
752                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
753                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
754         }
755
756         return 0;
757 }
758
759 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
760 {
761         WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
762                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
763                 (address << SQ_IND_INDEX__INDEX__SHIFT));
764         return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
765 }
766
767 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
768                            uint32_t thread, uint32_t regno,
769                            uint32_t num, uint32_t *out)
770 {
771         WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
772                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
773                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
774                 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
775                 (SQ_IND_INDEX__AUTO_INCR_MASK));
776         while (num--)
777                 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
778 }
779
780 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
781 {
782         /* in gfx11 the SIMD_ID is specified as part of the INSTANCE
783          * field when performing a select_se_sh so it should be
784          * zero here */
785         WARN_ON(simd != 0);
786
787         /* type 3 wave data */
788         dst[(*no_fields)++] = 3;
789         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
790         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
791         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
792         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
793         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
794         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
795         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
796         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
797         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
798         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
799         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
800         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
801         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
802         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
803         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
804 }
805
806 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
807                                      uint32_t wave, uint32_t start,
808                                      uint32_t size, uint32_t *dst)
809 {
810         WARN_ON(simd != 0);
811
812         wave_read_regs(
813                 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
814                 dst);
815 }
816
817 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
818                                       uint32_t wave, uint32_t thread,
819                                       uint32_t start, uint32_t size,
820                                       uint32_t *dst)
821 {
822         wave_read_regs(
823                 adev, wave, thread,
824                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
825 }
826
827 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
828                                         u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
829 {
830         soc21_grbm_select(adev, me, pipe, q, vm);
831 }
832
833 /* all sizes are in bytes */
834 #define MQD_SHADOW_BASE_SIZE      73728
835 #define MQD_SHADOW_BASE_ALIGNMENT 256
836 #define MQD_FWWORKAREA_SIZE       484
837 #define MQD_FWWORKAREA_ALIGNMENT  256
838
839 static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
840                                          struct amdgpu_gfx_shadow_info *shadow_info)
841 {
842         if (adev->gfx.cp_gfx_shadow) {
843                 shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
844                 shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
845                 shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
846                 shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
847                 return 0;
848         } else {
849                 memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
850                 return -ENOTSUPP;
851         }
852 }
853
854 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
855         .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter,
856         .select_se_sh = &gfx_v11_0_select_se_sh,
857         .read_wave_data = &gfx_v11_0_read_wave_data,
858         .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs,
859         .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
860         .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
861         .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
862         .get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info,
863 };
864
865 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
866 {
867
868         switch (adev->ip_versions[GC_HWIP][0]) {
869         case IP_VERSION(11, 0, 0):
870         case IP_VERSION(11, 0, 2):
871                 adev->gfx.config.max_hw_contexts = 8;
872                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
873                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
874                 adev->gfx.config.sc_hiz_tile_fifo_size = 0;
875                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
876                 break;
877         case IP_VERSION(11, 0, 3):
878                 adev->gfx.ras = &gfx_v11_0_3_ras;
879                 adev->gfx.config.max_hw_contexts = 8;
880                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
881                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
882                 adev->gfx.config.sc_hiz_tile_fifo_size = 0;
883                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
884                 break;
885         case IP_VERSION(11, 0, 1):
886         case IP_VERSION(11, 0, 4):
887                 adev->gfx.config.max_hw_contexts = 8;
888                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
889                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
890                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
891                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300;
892                 break;
893         default:
894                 BUG();
895                 break;
896         }
897
898         return 0;
899 }
900
901 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
902                                    int me, int pipe, int queue)
903 {
904         int r;
905         struct amdgpu_ring *ring;
906         unsigned int irq_type;
907
908         ring = &adev->gfx.gfx_ring[ring_id];
909
910         ring->me = me;
911         ring->pipe = pipe;
912         ring->queue = queue;
913
914         ring->ring_obj = NULL;
915         ring->use_doorbell = true;
916
917         if (!ring_id)
918                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
919         else
920                 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
921         ring->vm_hub = AMDGPU_GFXHUB(0);
922         sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
923
924         irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
925         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
926                              AMDGPU_RING_PRIO_DEFAULT, NULL);
927         if (r)
928                 return r;
929         return 0;
930 }
931
932 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
933                                        int mec, int pipe, int queue)
934 {
935         int r;
936         unsigned irq_type;
937         struct amdgpu_ring *ring;
938         unsigned int hw_prio;
939
940         ring = &adev->gfx.compute_ring[ring_id];
941
942         /* mec0 is me1 */
943         ring->me = mec + 1;
944         ring->pipe = pipe;
945         ring->queue = queue;
946
947         ring->ring_obj = NULL;
948         ring->use_doorbell = true;
949         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
950         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
951                                 + (ring_id * GFX11_MEC_HPD_SIZE);
952         ring->vm_hub = AMDGPU_GFXHUB(0);
953         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
954
955         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
956                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
957                 + ring->pipe;
958         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
959                         AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
960         /* type-2 packets are deprecated on MEC, use type-3 instead */
961         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
962                              hw_prio, NULL);
963         if (r)
964                 return r;
965
966         return 0;
967 }
968
969 static struct {
970         SOC21_FIRMWARE_ID       id;
971         unsigned int            offset;
972         unsigned int            size;
973 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX];
974
975 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
976 {
977         RLC_TABLE_OF_CONTENT *ucode = rlc_toc;
978
979         while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) &&
980                         (ucode->id < SOC21_FIRMWARE_ID_MAX)) {
981                 rlc_autoload_info[ucode->id].id = ucode->id;
982                 rlc_autoload_info[ucode->id].offset = ucode->offset * 4;
983                 rlc_autoload_info[ucode->id].size = ucode->size * 4;
984
985                 ucode++;
986         }
987 }
988
989 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev)
990 {
991         uint32_t total_size = 0;
992         SOC21_FIRMWARE_ID id;
993
994         gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
995
996         for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++)
997                 total_size += rlc_autoload_info[id].size;
998
999         /* In case the offset in rlc toc ucode is aligned */
1000         if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset)
1001                 total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset +
1002                         rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size;
1003
1004         return total_size;
1005 }
1006
1007 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
1008 {
1009         int r;
1010         uint32_t total_size;
1011
1012         total_size = gfx_v11_0_calc_toc_total_size(adev);
1013
1014         r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
1015                                       AMDGPU_GEM_DOMAIN_VRAM |
1016                                       AMDGPU_GEM_DOMAIN_GTT,
1017                                       &adev->gfx.rlc.rlc_autoload_bo,
1018                                       &adev->gfx.rlc.rlc_autoload_gpu_addr,
1019                                       (void **)&adev->gfx.rlc.rlc_autoload_ptr);
1020
1021         if (r) {
1022                 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
1023                 return r;
1024         }
1025
1026         return 0;
1027 }
1028
1029 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
1030                                               SOC21_FIRMWARE_ID id,
1031                                               const void *fw_data,
1032                                               uint32_t fw_size,
1033                                               uint32_t *fw_autoload_mask)
1034 {
1035         uint32_t toc_offset;
1036         uint32_t toc_fw_size;
1037         char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
1038
1039         if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX)
1040                 return;
1041
1042         toc_offset = rlc_autoload_info[id].offset;
1043         toc_fw_size = rlc_autoload_info[id].size;
1044
1045         if (fw_size == 0)
1046                 fw_size = toc_fw_size;
1047
1048         if (fw_size > toc_fw_size)
1049                 fw_size = toc_fw_size;
1050
1051         memcpy(ptr + toc_offset, fw_data, fw_size);
1052
1053         if (fw_size < toc_fw_size)
1054                 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
1055
1056         if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME))
1057                 *(uint64_t *)fw_autoload_mask |= 1ULL << id;
1058 }
1059
1060 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev,
1061                                                         uint32_t *fw_autoload_mask)
1062 {
1063         void *data;
1064         uint32_t size;
1065         uint64_t *toc_ptr;
1066
1067         *(uint64_t *)fw_autoload_mask |= 0x1;
1068
1069         DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask);
1070
1071         data = adev->psp.toc.start_addr;
1072         size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size;
1073
1074         toc_ptr = (uint64_t *)data + size / 8 - 1;
1075         *toc_ptr = *(uint64_t *)fw_autoload_mask;
1076
1077         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC,
1078                                         data, size, fw_autoload_mask);
1079 }
1080
1081 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev,
1082                                                         uint32_t *fw_autoload_mask)
1083 {
1084         const __le32 *fw_data;
1085         uint32_t fw_size;
1086         const struct gfx_firmware_header_v1_0 *cp_hdr;
1087         const struct gfx_firmware_header_v2_0 *cpv2_hdr;
1088         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1089         const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
1090         uint16_t version_major, version_minor;
1091
1092         if (adev->gfx.rs64_enable) {
1093                 /* pfp ucode */
1094                 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1095                         adev->gfx.pfp_fw->data;
1096                 /* instruction */
1097                 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1098                         le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1099                 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1100                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP,
1101                                                 fw_data, fw_size, fw_autoload_mask);
1102                 /* data */
1103                 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1104                         le32_to_cpu(cpv2_hdr->data_offset_bytes));
1105                 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1106                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK,
1107                                                 fw_data, fw_size, fw_autoload_mask);
1108                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK,
1109                                                 fw_data, fw_size, fw_autoload_mask);
1110                 /* me ucode */
1111                 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1112                         adev->gfx.me_fw->data;
1113                 /* instruction */
1114                 fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1115                         le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1116                 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1117                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME,
1118                                                 fw_data, fw_size, fw_autoload_mask);
1119                 /* data */
1120                 fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1121                         le32_to_cpu(cpv2_hdr->data_offset_bytes));
1122                 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1123                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK,
1124                                                 fw_data, fw_size, fw_autoload_mask);
1125                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK,
1126                                                 fw_data, fw_size, fw_autoload_mask);
1127                 /* mec ucode */
1128                 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1129                         adev->gfx.mec_fw->data;
1130                 /* instruction */
1131                 fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1132                         le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1133                 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1134                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC,
1135                                                 fw_data, fw_size, fw_autoload_mask);
1136                 /* data */
1137                 fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1138                         le32_to_cpu(cpv2_hdr->data_offset_bytes));
1139                 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1140                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK,
1141                                                 fw_data, fw_size, fw_autoload_mask);
1142                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK,
1143                                                 fw_data, fw_size, fw_autoload_mask);
1144                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK,
1145                                                 fw_data, fw_size, fw_autoload_mask);
1146                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK,
1147                                                 fw_data, fw_size, fw_autoload_mask);
1148         } else {
1149                 /* pfp ucode */
1150                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1151                         adev->gfx.pfp_fw->data;
1152                 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1153                                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1154                 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1155                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP,
1156                                                 fw_data, fw_size, fw_autoload_mask);
1157
1158                 /* me ucode */
1159                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1160                         adev->gfx.me_fw->data;
1161                 fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1162                                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1163                 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1164                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME,
1165                                                 fw_data, fw_size, fw_autoload_mask);
1166
1167                 /* mec ucode */
1168                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1169                         adev->gfx.mec_fw->data;
1170                 fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1171                                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1172                 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1173                         cp_hdr->jt_size * 4;
1174                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC,
1175                                                 fw_data, fw_size, fw_autoload_mask);
1176         }
1177
1178         /* rlc ucode */
1179         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
1180                 adev->gfx.rlc_fw->data;
1181         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1182                         le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
1183         fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
1184         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE,
1185                                         fw_data, fw_size, fw_autoload_mask);
1186
1187         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1188         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1189         if (version_major == 2) {
1190                 if (version_minor >= 2) {
1191                         rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1192
1193                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1194                                         le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
1195                         fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
1196                         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE,
1197                                         fw_data, fw_size, fw_autoload_mask);
1198
1199                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1200                                         le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
1201                         fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
1202                         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT,
1203                                         fw_data, fw_size, fw_autoload_mask);
1204                 }
1205         }
1206 }
1207
1208 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev,
1209                                                         uint32_t *fw_autoload_mask)
1210 {
1211         const __le32 *fw_data;
1212         uint32_t fw_size;
1213         const struct sdma_firmware_header_v2_0 *sdma_hdr;
1214
1215         sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
1216                 adev->sdma.instance[0].fw->data;
1217         fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1218                         le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
1219         fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
1220
1221         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1222                         SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask);
1223
1224         fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1225                         le32_to_cpu(sdma_hdr->ctl_ucode_offset));
1226         fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
1227
1228         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1229                         SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask);
1230 }
1231
1232 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev,
1233                                                         uint32_t *fw_autoload_mask)
1234 {
1235         const __le32 *fw_data;
1236         unsigned fw_size;
1237         const struct mes_firmware_header_v1_0 *mes_hdr;
1238         int pipe, ucode_id, data_id;
1239
1240         for (pipe = 0; pipe < 2; pipe++) {
1241                 if (pipe==0) {
1242                         ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0;
1243                         data_id  = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK;
1244                 } else {
1245                         ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1;
1246                         data_id  = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK;
1247                 }
1248
1249                 mes_hdr = (const struct mes_firmware_header_v1_0 *)
1250                         adev->mes.fw[pipe]->data;
1251
1252                 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1253                                 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
1254                 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
1255
1256                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1257                                 ucode_id, fw_data, fw_size, fw_autoload_mask);
1258
1259                 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1260                                 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
1261                 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
1262
1263                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1264                                 data_id, fw_data, fw_size, fw_autoload_mask);
1265         }
1266 }
1267
1268 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
1269 {
1270         uint32_t rlc_g_offset, rlc_g_size;
1271         uint64_t gpu_addr;
1272         uint32_t autoload_fw_id[2];
1273
1274         memset(autoload_fw_id, 0, sizeof(uint32_t) * 2);
1275
1276         /* RLC autoload sequence 2: copy ucode */
1277         gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id);
1278         gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id);
1279         gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id);
1280         gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id);
1281
1282         rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset;
1283         rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size;
1284         gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
1285
1286         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
1287         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
1288
1289         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
1290
1291         /* RLC autoload sequence 3: load IMU fw */
1292         if (adev->gfx.imu.funcs->load_microcode)
1293                 adev->gfx.imu.funcs->load_microcode(adev);
1294         /* RLC autoload sequence 4 init IMU fw */
1295         if (adev->gfx.imu.funcs->setup_imu)
1296                 adev->gfx.imu.funcs->setup_imu(adev);
1297         if (adev->gfx.imu.funcs->start_imu)
1298                 adev->gfx.imu.funcs->start_imu(adev);
1299
1300         /* RLC autoload sequence 5 disable gpa mode */
1301         gfx_v11_0_disable_gpa_mode(adev);
1302
1303         return 0;
1304 }
1305
1306 static int gfx_v11_0_sw_init(void *handle)
1307 {
1308         int i, j, k, r, ring_id = 0;
1309         struct amdgpu_kiq *kiq;
1310         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1311
1312         adev->gfxhub.funcs->init(adev);
1313
1314         switch (adev->ip_versions[GC_HWIP][0]) {
1315         case IP_VERSION(11, 0, 0):
1316         case IP_VERSION(11, 0, 2):
1317         case IP_VERSION(11, 0, 3):
1318                 adev->gfx.me.num_me = 1;
1319                 adev->gfx.me.num_pipe_per_me = 1;
1320                 adev->gfx.me.num_queue_per_pipe = 1;
1321                 adev->gfx.mec.num_mec = 2;
1322                 adev->gfx.mec.num_pipe_per_mec = 4;
1323                 adev->gfx.mec.num_queue_per_pipe = 4;
1324                 break;
1325         case IP_VERSION(11, 0, 1):
1326         case IP_VERSION(11, 0, 4):
1327                 adev->gfx.me.num_me = 1;
1328                 adev->gfx.me.num_pipe_per_me = 1;
1329                 adev->gfx.me.num_queue_per_pipe = 1;
1330                 adev->gfx.mec.num_mec = 1;
1331                 adev->gfx.mec.num_pipe_per_mec = 4;
1332                 adev->gfx.mec.num_queue_per_pipe = 4;
1333                 break;
1334         default:
1335                 adev->gfx.me.num_me = 1;
1336                 adev->gfx.me.num_pipe_per_me = 1;
1337                 adev->gfx.me.num_queue_per_pipe = 1;
1338                 adev->gfx.mec.num_mec = 1;
1339                 adev->gfx.mec.num_pipe_per_mec = 4;
1340                 adev->gfx.mec.num_queue_per_pipe = 8;
1341                 break;
1342         }
1343
1344         /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */
1345         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3) &&
1346                 amdgpu_sriov_is_pp_one_vf(adev))
1347                 adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG;
1348
1349         /* EOP Event */
1350         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1351                               GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
1352                               &adev->gfx.eop_irq);
1353         if (r)
1354                 return r;
1355
1356         /* Privileged reg */
1357         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1358                               GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
1359                               &adev->gfx.priv_reg_irq);
1360         if (r)
1361                 return r;
1362
1363         /* Privileged inst */
1364         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1365                               GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
1366                               &adev->gfx.priv_inst_irq);
1367         if (r)
1368                 return r;
1369
1370         /* FED error */
1371         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
1372                                   GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT,
1373                                   &adev->gfx.rlc_gc_fed_irq);
1374         if (r)
1375                 return r;
1376
1377         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1378
1379         if (adev->gfx.imu.funcs) {
1380                 if (adev->gfx.imu.funcs->init_microcode) {
1381                         r = adev->gfx.imu.funcs->init_microcode(adev);
1382                         if (r)
1383                                 DRM_ERROR("Failed to load imu firmware!\n");
1384                 }
1385         }
1386
1387         gfx_v11_0_me_init(adev);
1388
1389         r = gfx_v11_0_rlc_init(adev);
1390         if (r) {
1391                 DRM_ERROR("Failed to init rlc BOs!\n");
1392                 return r;
1393         }
1394
1395         r = gfx_v11_0_mec_init(adev);
1396         if (r) {
1397                 DRM_ERROR("Failed to init MEC BOs!\n");
1398                 return r;
1399         }
1400
1401         /* set up the gfx ring */
1402         for (i = 0; i < adev->gfx.me.num_me; i++) {
1403                 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
1404                         for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
1405                                 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
1406                                         continue;
1407
1408                                 r = gfx_v11_0_gfx_ring_init(adev, ring_id,
1409                                                             i, k, j);
1410                                 if (r)
1411                                         return r;
1412                                 ring_id++;
1413                         }
1414                 }
1415         }
1416
1417         ring_id = 0;
1418         /* set up the compute queues - allocate horizontally across pipes */
1419         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1420                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1421                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1422                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
1423                                                                      k, j))
1424                                         continue;
1425
1426                                 r = gfx_v11_0_compute_ring_init(adev, ring_id,
1427                                                                 i, k, j);
1428                                 if (r)
1429                                         return r;
1430
1431                                 ring_id++;
1432                         }
1433                 }
1434         }
1435
1436         if (!adev->enable_mes_kiq) {
1437                 r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0);
1438                 if (r) {
1439                         DRM_ERROR("Failed to init KIQ BOs!\n");
1440                         return r;
1441                 }
1442
1443                 kiq = &adev->gfx.kiq[0];
1444                 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0);
1445                 if (r)
1446                         return r;
1447         }
1448
1449         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0);
1450         if (r)
1451                 return r;
1452
1453         /* allocate visible FB for rlc auto-loading fw */
1454         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1455                 r = gfx_v11_0_rlc_autoload_buffer_init(adev);
1456                 if (r)
1457                         return r;
1458         }
1459
1460         r = gfx_v11_0_gpu_early_init(adev);
1461         if (r)
1462                 return r;
1463
1464         if (amdgpu_gfx_ras_sw_init(adev)) {
1465                 dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
1466                 return -EINVAL;
1467         }
1468
1469         return 0;
1470 }
1471
1472 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev)
1473 {
1474         amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
1475                               &adev->gfx.pfp.pfp_fw_gpu_addr,
1476                               (void **)&adev->gfx.pfp.pfp_fw_ptr);
1477
1478         amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
1479                               &adev->gfx.pfp.pfp_fw_data_gpu_addr,
1480                               (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
1481 }
1482
1483 static void gfx_v11_0_me_fini(struct amdgpu_device *adev)
1484 {
1485         amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
1486                               &adev->gfx.me.me_fw_gpu_addr,
1487                               (void **)&adev->gfx.me.me_fw_ptr);
1488
1489         amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
1490                                &adev->gfx.me.me_fw_data_gpu_addr,
1491                                (void **)&adev->gfx.me.me_fw_data_ptr);
1492 }
1493
1494 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
1495 {
1496         amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
1497                         &adev->gfx.rlc.rlc_autoload_gpu_addr,
1498                         (void **)&adev->gfx.rlc.rlc_autoload_ptr);
1499 }
1500
1501 static int gfx_v11_0_sw_fini(void *handle)
1502 {
1503         int i;
1504         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1505
1506         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1507                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1508         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1509                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1510
1511         amdgpu_gfx_mqd_sw_fini(adev, 0);
1512
1513         if (!adev->enable_mes_kiq) {
1514                 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
1515                 amdgpu_gfx_kiq_fini(adev, 0);
1516         }
1517
1518         gfx_v11_0_pfp_fini(adev);
1519         gfx_v11_0_me_fini(adev);
1520         gfx_v11_0_rlc_fini(adev);
1521         gfx_v11_0_mec_fini(adev);
1522
1523         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1524                 gfx_v11_0_rlc_autoload_buffer_fini(adev);
1525
1526         gfx_v11_0_free_microcode(adev);
1527
1528         return 0;
1529 }
1530
1531 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
1532                                    u32 sh_num, u32 instance, int xcc_id)
1533 {
1534         u32 data;
1535
1536         if (instance == 0xffffffff)
1537                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
1538                                      INSTANCE_BROADCAST_WRITES, 1);
1539         else
1540                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
1541                                      instance);
1542
1543         if (se_num == 0xffffffff)
1544                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
1545                                      1);
1546         else
1547                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1548
1549         if (sh_num == 0xffffffff)
1550                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
1551                                      1);
1552         else
1553                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
1554
1555         WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
1556 }
1557
1558 static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev)
1559 {
1560         u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
1561
1562         gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE);
1563         gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
1564                                            CC_GC_SA_UNIT_DISABLE,
1565                                            SA_DISABLE);
1566         gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE);
1567         gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
1568                                                  GC_USER_SA_UNIT_DISABLE,
1569                                                  SA_DISABLE);
1570         sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
1571                                             adev->gfx.config.max_shader_engines);
1572
1573         return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
1574 }
1575
1576 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1577 {
1578         u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
1579         u32 rb_mask;
1580
1581         gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
1582         gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
1583                                             CC_RB_BACKEND_DISABLE,
1584                                             BACKEND_DISABLE);
1585         gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
1586         gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
1587                                                  GC_USER_RB_BACKEND_DISABLE,
1588                                                  BACKEND_DISABLE);
1589         rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
1590                                             adev->gfx.config.max_shader_engines);
1591
1592         return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
1593 }
1594
1595 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
1596 {
1597         u32 rb_bitmap_width_per_sa;
1598         u32 max_sa;
1599         u32 active_sa_bitmap;
1600         u32 global_active_rb_bitmap;
1601         u32 active_rb_bitmap = 0;
1602         u32 i;
1603
1604         /* query sa bitmap from SA_UNIT_DISABLE registers */
1605         active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev);
1606         /* query rb bitmap from RB_BACKEND_DISABLE registers */
1607         global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev);
1608
1609         /* generate active rb bitmap according to active sa bitmap */
1610         max_sa = adev->gfx.config.max_shader_engines *
1611                  adev->gfx.config.max_sh_per_se;
1612         rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
1613                                  adev->gfx.config.max_sh_per_se;
1614         for (i = 0; i < max_sa; i++) {
1615                 if (active_sa_bitmap & (1 << i))
1616                         active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
1617         }
1618
1619         active_rb_bitmap |= global_active_rb_bitmap;
1620         adev->gfx.config.backend_enable_mask = active_rb_bitmap;
1621         adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
1622 }
1623
1624 #define DEFAULT_SH_MEM_BASES    (0x6000)
1625 #define LDS_APP_BASE           0x1
1626 #define SCRATCH_APP_BASE       0x2
1627
1628 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
1629 {
1630         int i;
1631         uint32_t sh_mem_bases;
1632         uint32_t data;
1633
1634         /*
1635          * Configure apertures:
1636          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1637          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1638          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1639          */
1640         sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
1641                         SCRATCH_APP_BASE;
1642
1643         mutex_lock(&adev->srbm_mutex);
1644         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1645                 soc21_grbm_select(adev, 0, 0, 0, i);
1646                 /* CP and shaders */
1647                 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1648                 WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
1649
1650                 /* Enable trap for each kfd vmid. */
1651                 data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
1652                 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
1653                 WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data);
1654         }
1655         soc21_grbm_select(adev, 0, 0, 0, 0);
1656         mutex_unlock(&adev->srbm_mutex);
1657
1658         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
1659            acccess. These should be enabled by FW for target VMIDs. */
1660         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1661                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
1662                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
1663                 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0);
1664                 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0);
1665         }
1666 }
1667
1668 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev)
1669 {
1670         int vmid;
1671
1672         /*
1673          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
1674          * access. Compute VMIDs should be enabled by FW for target VMIDs,
1675          * the driver can enable them for graphics. VMID0 should maintain
1676          * access so that HWS firmware can save/restore entries.
1677          */
1678         for (vmid = 1; vmid < 16; vmid++) {
1679                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0);
1680                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0);
1681                 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0);
1682                 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0);
1683         }
1684 }
1685
1686 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev)
1687 {
1688         /* TODO: harvest feature to be added later. */
1689 }
1690
1691 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev)
1692 {
1693         /* TCCs are global (not instanced). */
1694         uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) |
1695                                RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE);
1696
1697         adev->gfx.config.tcc_disabled_mask =
1698                 REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
1699                 (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
1700 }
1701
1702 static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
1703 {
1704         u32 tmp;
1705         int i;
1706
1707         if (!amdgpu_sriov_vf(adev))
1708                 WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1709
1710         gfx_v11_0_setup_rb(adev);
1711         gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info);
1712         gfx_v11_0_get_tcc_info(adev);
1713         adev->gfx.config.pa_sc_tile_steering_override = 0;
1714
1715         /* Set whether texture coordinate truncation is conformant. */
1716         tmp = RREG32_SOC15(GC, 0, regTA_CNTL2);
1717         adev->gfx.config.ta_cntl2_truncate_coord_mode =
1718                 REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE);
1719
1720         /* XXX SH_MEM regs */
1721         /* where to put LDS, scratch, GPUVM in FSA64 space */
1722         mutex_lock(&adev->srbm_mutex);
1723         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
1724                 soc21_grbm_select(adev, 0, 0, 0, i);
1725                 /* CP and shaders */
1726                 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1727                 if (i != 0) {
1728                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1729                                 (adev->gmc.private_aperture_start >> 48));
1730                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1731                                 (adev->gmc.shared_aperture_start >> 48));
1732                         WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
1733                 }
1734         }
1735         soc21_grbm_select(adev, 0, 0, 0, 0);
1736
1737         mutex_unlock(&adev->srbm_mutex);
1738
1739         gfx_v11_0_init_compute_vmid(adev);
1740         gfx_v11_0_init_gds_vmid(adev);
1741 }
1742
1743 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
1744                                                bool enable)
1745 {
1746         u32 tmp;
1747
1748         if (amdgpu_sriov_vf(adev))
1749                 return;
1750
1751         tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0);
1752
1753         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
1754                             enable ? 1 : 0);
1755         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
1756                             enable ? 1 : 0);
1757         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
1758                             enable ? 1 : 0);
1759         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
1760                             enable ? 1 : 0);
1761
1762         WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp);
1763 }
1764
1765 static int gfx_v11_0_init_csb(struct amdgpu_device *adev)
1766 {
1767         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
1768
1769         WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
1770                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
1771         WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
1772                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
1773         WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
1774
1775         return 0;
1776 }
1777
1778 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)
1779 {
1780         u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
1781
1782         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
1783         WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
1784 }
1785
1786 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev)
1787 {
1788         WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
1789         udelay(50);
1790         WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
1791         udelay(50);
1792 }
1793
1794 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
1795                                              bool enable)
1796 {
1797         uint32_t rlc_pg_cntl;
1798
1799         rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
1800
1801         if (!enable) {
1802                 /* RLC_PG_CNTL[23] = 0 (default)
1803                  * RLC will wait for handshake acks with SMU
1804                  * GFXOFF will be enabled
1805                  * RLC_PG_CNTL[23] = 1
1806                  * RLC will not issue any message to SMU
1807                  * hence no handshake between SMU & RLC
1808                  * GFXOFF will be disabled
1809                  */
1810                 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1811         } else
1812                 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1813         WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
1814 }
1815
1816 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev)
1817 {
1818         /* TODO: enable rlc & smu handshake until smu
1819          * and gfxoff feature works as expected */
1820         if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
1821                 gfx_v11_0_rlc_smu_handshake_cntl(adev, false);
1822
1823         WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
1824         udelay(50);
1825 }
1826
1827 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev)
1828 {
1829         uint32_t tmp;
1830
1831         /* enable Save Restore Machine */
1832         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
1833         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
1834         tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
1835         WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
1836 }
1837
1838 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev)
1839 {
1840         const struct rlc_firmware_header_v2_0 *hdr;
1841         const __le32 *fw_data;
1842         unsigned i, fw_size;
1843
1844         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1845         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1846                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1847         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1848
1849         WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
1850                      RLCG_UCODE_LOADING_START_ADDRESS);
1851
1852         for (i = 0; i < fw_size; i++)
1853                 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
1854                              le32_to_cpup(fw_data++));
1855
1856         WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
1857 }
1858
1859 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
1860 {
1861         const struct rlc_firmware_header_v2_2 *hdr;
1862         const __le32 *fw_data;
1863         unsigned i, fw_size;
1864         u32 tmp;
1865
1866         hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1867
1868         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1869                         le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
1870         fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
1871
1872         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
1873
1874         for (i = 0; i < fw_size; i++) {
1875                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1876                         msleep(1);
1877                 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
1878                                 le32_to_cpup(fw_data++));
1879         }
1880
1881         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1882
1883         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1884                         le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
1885         fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
1886
1887         WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
1888         for (i = 0; i < fw_size; i++) {
1889                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1890                         msleep(1);
1891                 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
1892                                 le32_to_cpup(fw_data++));
1893         }
1894
1895         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1896
1897         tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
1898         tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
1899         tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
1900         WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
1901 }
1902
1903 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev)
1904 {
1905         const struct rlc_firmware_header_v2_3 *hdr;
1906         const __le32 *fw_data;
1907         unsigned i, fw_size;
1908         u32 tmp;
1909
1910         hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
1911
1912         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1913                         le32_to_cpu(hdr->rlcp_ucode_offset_bytes));
1914         fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4;
1915
1916         WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0);
1917
1918         for (i = 0; i < fw_size; i++) {
1919                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1920                         msleep(1);
1921                 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA,
1922                                 le32_to_cpup(fw_data++));
1923         }
1924
1925         WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version);
1926
1927         tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
1928         tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
1929         WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp);
1930
1931         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1932                         le32_to_cpu(hdr->rlcv_ucode_offset_bytes));
1933         fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4;
1934
1935         WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0);
1936
1937         for (i = 0; i < fw_size; i++) {
1938                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1939                         msleep(1);
1940                 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA,
1941                                 le32_to_cpup(fw_data++));
1942         }
1943
1944         WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version);
1945
1946         tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL);
1947         tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1);
1948         WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp);
1949 }
1950
1951 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
1952 {
1953         const struct rlc_firmware_header_v2_0 *hdr;
1954         uint16_t version_major;
1955         uint16_t version_minor;
1956
1957         if (!adev->gfx.rlc_fw)
1958                 return -EINVAL;
1959
1960         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1961         amdgpu_ucode_print_rlc_hdr(&hdr->header);
1962
1963         version_major = le16_to_cpu(hdr->header.header_version_major);
1964         version_minor = le16_to_cpu(hdr->header.header_version_minor);
1965
1966         if (version_major == 2) {
1967                 gfx_v11_0_load_rlcg_microcode(adev);
1968                 if (amdgpu_dpm == 1) {
1969                         if (version_minor >= 2)
1970                                 gfx_v11_0_load_rlc_iram_dram_microcode(adev);
1971                         if (version_minor == 3)
1972                                 gfx_v11_0_load_rlcp_rlcv_microcode(adev);
1973                 }
1974                 
1975                 return 0;
1976         }
1977
1978         return -EINVAL;
1979 }
1980
1981 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev)
1982 {
1983         int r;
1984
1985         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1986                 gfx_v11_0_init_csb(adev);
1987
1988                 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
1989                         gfx_v11_0_rlc_enable_srm(adev);
1990         } else {
1991                 if (amdgpu_sriov_vf(adev)) {
1992                         gfx_v11_0_init_csb(adev);
1993                         return 0;
1994                 }
1995
1996                 adev->gfx.rlc.funcs->stop(adev);
1997
1998                 /* disable CG */
1999                 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
2000
2001                 /* disable PG */
2002                 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
2003
2004                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
2005                         /* legacy rlc firmware loading */
2006                         r = gfx_v11_0_rlc_load_microcode(adev);
2007                         if (r)
2008                                 return r;
2009                 }
2010
2011                 gfx_v11_0_init_csb(adev);
2012
2013                 adev->gfx.rlc.funcs->start(adev);
2014         }
2015         return 0;
2016 }
2017
2018 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
2019 {
2020         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2021         uint32_t tmp;
2022         int i;
2023
2024         /* Trigger an invalidation of the L1 instruction caches */
2025         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2026         tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2027         WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2028
2029         /* Wait for invalidation complete */
2030         for (i = 0; i < usec_timeout; i++) {
2031                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2032                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2033                                         INVALIDATE_CACHE_COMPLETE))
2034                         break;
2035                 udelay(1);
2036         }
2037
2038         if (i >= usec_timeout) {
2039                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2040                 return -EINVAL;
2041         }
2042
2043         if (amdgpu_emu_mode == 1)
2044                 adev->hdp.funcs->flush_hdp(adev, NULL);
2045
2046         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2047         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2048         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2049         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2050         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2051         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2052
2053         /* Program me ucode address into intruction cache address register */
2054         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2055                         lower_32_bits(addr) & 0xFFFFF000);
2056         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2057                         upper_32_bits(addr));
2058
2059         return 0;
2060 }
2061
2062 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
2063 {
2064         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2065         uint32_t tmp;
2066         int i;
2067
2068         /* Trigger an invalidation of the L1 instruction caches */
2069         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2070         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2071         WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2072
2073         /* Wait for invalidation complete */
2074         for (i = 0; i < usec_timeout; i++) {
2075                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2076                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2077                                         INVALIDATE_CACHE_COMPLETE))
2078                         break;
2079                 udelay(1);
2080         }
2081
2082         if (i >= usec_timeout) {
2083                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2084                 return -EINVAL;
2085         }
2086
2087         if (amdgpu_emu_mode == 1)
2088                 adev->hdp.funcs->flush_hdp(adev, NULL);
2089
2090         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2091         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2092         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2093         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2094         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2095         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2096
2097         /* Program pfp ucode address into intruction cache address register */
2098         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2099                         lower_32_bits(addr) & 0xFFFFF000);
2100         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2101                         upper_32_bits(addr));
2102
2103         return 0;
2104 }
2105
2106 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
2107 {
2108         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2109         uint32_t tmp;
2110         int i;
2111
2112         /* Trigger an invalidation of the L1 instruction caches */
2113         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2114         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2115
2116         WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2117
2118         /* Wait for invalidation complete */
2119         for (i = 0; i < usec_timeout; i++) {
2120                 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2121                 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2122                                         INVALIDATE_CACHE_COMPLETE))
2123                         break;
2124                 udelay(1);
2125         }
2126
2127         if (i >= usec_timeout) {
2128                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2129                 return -EINVAL;
2130         }
2131
2132         if (amdgpu_emu_mode == 1)
2133                 adev->hdp.funcs->flush_hdp(adev, NULL);
2134
2135         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2136         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2137         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2138         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2139         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2140
2141         /* Program mec1 ucode address into intruction cache address register */
2142         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
2143                         lower_32_bits(addr) & 0xFFFFF000);
2144         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2145                         upper_32_bits(addr));
2146
2147         return 0;
2148 }
2149
2150 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2151 {
2152         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2153         uint32_t tmp;
2154         unsigned i, pipe_id;
2155         const struct gfx_firmware_header_v2_0 *pfp_hdr;
2156
2157         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2158                 adev->gfx.pfp_fw->data;
2159
2160         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2161                 lower_32_bits(addr));
2162         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2163                 upper_32_bits(addr));
2164
2165         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2166         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2167         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2168         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2169         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2170
2171         /*
2172          * Programming any of the CP_PFP_IC_BASE registers
2173          * forces invalidation of the ME L1 I$. Wait for the
2174          * invalidation complete
2175          */
2176         for (i = 0; i < usec_timeout; i++) {
2177                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2178                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2179                         INVALIDATE_CACHE_COMPLETE))
2180                         break;
2181                 udelay(1);
2182         }
2183
2184         if (i >= usec_timeout) {
2185                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2186                 return -EINVAL;
2187         }
2188
2189         /* Prime the L1 instruction caches */
2190         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2191         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2192         WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2193         /* Waiting for cache primed*/
2194         for (i = 0; i < usec_timeout; i++) {
2195                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2196                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2197                         ICACHE_PRIMED))
2198                         break;
2199                 udelay(1);
2200         }
2201
2202         if (i >= usec_timeout) {
2203                 dev_err(adev->dev, "failed to prime instruction cache\n");
2204                 return -EINVAL;
2205         }
2206
2207         mutex_lock(&adev->srbm_mutex);
2208         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2209                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2210                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2211                         (pfp_hdr->ucode_start_addr_hi << 30) |
2212                         (pfp_hdr->ucode_start_addr_lo >> 2));
2213                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2214                         pfp_hdr->ucode_start_addr_hi >> 2);
2215
2216                 /*
2217                  * Program CP_ME_CNTL to reset given PIPE to take
2218                  * effect of CP_PFP_PRGRM_CNTR_START.
2219                  */
2220                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2221                 if (pipe_id == 0)
2222                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2223                                         PFP_PIPE0_RESET, 1);
2224                 else
2225                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2226                                         PFP_PIPE1_RESET, 1);
2227                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2228
2229                 /* Clear pfp pipe0 reset bit. */
2230                 if (pipe_id == 0)
2231                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2232                                         PFP_PIPE0_RESET, 0);
2233                 else
2234                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2235                                         PFP_PIPE1_RESET, 0);
2236                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2237
2238                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2239                         lower_32_bits(addr2));
2240                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2241                         upper_32_bits(addr2));
2242         }
2243         soc21_grbm_select(adev, 0, 0, 0, 0);
2244         mutex_unlock(&adev->srbm_mutex);
2245
2246         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2247         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2248         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2249         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2250
2251         /* Invalidate the data caches */
2252         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2253         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2254         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2255
2256         for (i = 0; i < usec_timeout; i++) {
2257                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2258                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2259                         INVALIDATE_DCACHE_COMPLETE))
2260                         break;
2261                 udelay(1);
2262         }
2263
2264         if (i >= usec_timeout) {
2265                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2266                 return -EINVAL;
2267         }
2268
2269         return 0;
2270 }
2271
2272 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2273 {
2274         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2275         uint32_t tmp;
2276         unsigned i, pipe_id;
2277         const struct gfx_firmware_header_v2_0 *me_hdr;
2278
2279         me_hdr = (const struct gfx_firmware_header_v2_0 *)
2280                 adev->gfx.me_fw->data;
2281
2282         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2283                 lower_32_bits(addr));
2284         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2285                 upper_32_bits(addr));
2286
2287         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2288         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2289         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2290         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2291         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2292
2293         /*
2294          * Programming any of the CP_ME_IC_BASE registers
2295          * forces invalidation of the ME L1 I$. Wait for the
2296          * invalidation complete
2297          */
2298         for (i = 0; i < usec_timeout; i++) {
2299                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2300                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2301                         INVALIDATE_CACHE_COMPLETE))
2302                         break;
2303                 udelay(1);
2304         }
2305
2306         if (i >= usec_timeout) {
2307                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2308                 return -EINVAL;
2309         }
2310
2311         /* Prime the instruction caches */
2312         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2313         tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
2314         WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2315
2316         /* Waiting for instruction cache primed*/
2317         for (i = 0; i < usec_timeout; i++) {
2318                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2319                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2320                         ICACHE_PRIMED))
2321                         break;
2322                 udelay(1);
2323         }
2324
2325         if (i >= usec_timeout) {
2326                 dev_err(adev->dev, "failed to prime instruction cache\n");
2327                 return -EINVAL;
2328         }
2329
2330         mutex_lock(&adev->srbm_mutex);
2331         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2332                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2333                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2334                         (me_hdr->ucode_start_addr_hi << 30) |
2335                         (me_hdr->ucode_start_addr_lo >> 2) );
2336                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2337                         me_hdr->ucode_start_addr_hi>>2);
2338
2339                 /*
2340                  * Program CP_ME_CNTL to reset given PIPE to take
2341                  * effect of CP_PFP_PRGRM_CNTR_START.
2342                  */
2343                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2344                 if (pipe_id == 0)
2345                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2346                                         ME_PIPE0_RESET, 1);
2347                 else
2348                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2349                                         ME_PIPE1_RESET, 1);
2350                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2351
2352                 /* Clear pfp pipe0 reset bit. */
2353                 if (pipe_id == 0)
2354                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2355                                         ME_PIPE0_RESET, 0);
2356                 else
2357                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2358                                         ME_PIPE1_RESET, 0);
2359                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2360
2361                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
2362                         lower_32_bits(addr2));
2363                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
2364                         upper_32_bits(addr2));
2365         }
2366         soc21_grbm_select(adev, 0, 0, 0, 0);
2367         mutex_unlock(&adev->srbm_mutex);
2368
2369         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2370         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2371         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2372         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2373
2374         /* Invalidate the data caches */
2375         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2376         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2377         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2378
2379         for (i = 0; i < usec_timeout; i++) {
2380                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2381                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2382                         INVALIDATE_DCACHE_COMPLETE))
2383                         break;
2384                 udelay(1);
2385         }
2386
2387         if (i >= usec_timeout) {
2388                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2389                 return -EINVAL;
2390         }
2391
2392         return 0;
2393 }
2394
2395 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2396 {
2397         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2398         uint32_t tmp;
2399         unsigned i;
2400         const struct gfx_firmware_header_v2_0 *mec_hdr;
2401
2402         mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2403                 adev->gfx.mec_fw->data;
2404
2405         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2406         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2407         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2408         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2409         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2410
2411         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
2412         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
2413         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
2414         WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
2415
2416         mutex_lock(&adev->srbm_mutex);
2417         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
2418                 soc21_grbm_select(adev, 1, i, 0, 0);
2419
2420                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2);
2421                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
2422                      upper_32_bits(addr2));
2423
2424                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2425                                         mec_hdr->ucode_start_addr_lo >> 2 |
2426                                         mec_hdr->ucode_start_addr_hi << 30);
2427                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2428                                         mec_hdr->ucode_start_addr_hi >> 2);
2429
2430                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr);
2431                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2432                      upper_32_bits(addr));
2433         }
2434         mutex_unlock(&adev->srbm_mutex);
2435         soc21_grbm_select(adev, 0, 0, 0, 0);
2436
2437         /* Trigger an invalidation of the L1 instruction caches */
2438         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2439         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2440         WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
2441
2442         /* Wait for invalidation complete */
2443         for (i = 0; i < usec_timeout; i++) {
2444                 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2445                 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
2446                                        INVALIDATE_DCACHE_COMPLETE))
2447                         break;
2448                 udelay(1);
2449         }
2450
2451         if (i >= usec_timeout) {
2452                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2453                 return -EINVAL;
2454         }
2455
2456         /* Trigger an invalidation of the L1 instruction caches */
2457         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2458         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2459         WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2460
2461         /* Wait for invalidation complete */
2462         for (i = 0; i < usec_timeout; i++) {
2463                 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2464                 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2465                                        INVALIDATE_CACHE_COMPLETE))
2466                         break;
2467                 udelay(1);
2468         }
2469
2470         if (i >= usec_timeout) {
2471                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2472                 return -EINVAL;
2473         }
2474
2475         return 0;
2476 }
2477
2478 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev)
2479 {
2480         const struct gfx_firmware_header_v2_0 *pfp_hdr;
2481         const struct gfx_firmware_header_v2_0 *me_hdr;
2482         const struct gfx_firmware_header_v2_0 *mec_hdr;
2483         uint32_t pipe_id, tmp;
2484
2485         mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2486                 adev->gfx.mec_fw->data;
2487         me_hdr = (const struct gfx_firmware_header_v2_0 *)
2488                 adev->gfx.me_fw->data;
2489         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2490                 adev->gfx.pfp_fw->data;
2491
2492         /* config pfp program start addr */
2493         for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2494                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2495                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2496                         (pfp_hdr->ucode_start_addr_hi << 30) |
2497                         (pfp_hdr->ucode_start_addr_lo >> 2));
2498                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2499                         pfp_hdr->ucode_start_addr_hi >> 2);
2500         }
2501         soc21_grbm_select(adev, 0, 0, 0, 0);
2502
2503         /* reset pfp pipe */
2504         tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2505         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
2506         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
2507         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2508
2509         /* clear pfp pipe reset */
2510         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
2511         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
2512         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2513
2514         /* config me program start addr */
2515         for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2516                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2517                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2518                         (me_hdr->ucode_start_addr_hi << 30) |
2519                         (me_hdr->ucode_start_addr_lo >> 2) );
2520                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2521                         me_hdr->ucode_start_addr_hi>>2);
2522         }
2523         soc21_grbm_select(adev, 0, 0, 0, 0);
2524
2525         /* reset me pipe */
2526         tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2527         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
2528         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
2529         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2530
2531         /* clear me pipe reset */
2532         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
2533         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
2534         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2535
2536         /* config mec program start addr */
2537         for (pipe_id = 0; pipe_id < 4; pipe_id++) {
2538                 soc21_grbm_select(adev, 1, pipe_id, 0, 0);
2539                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2540                                         mec_hdr->ucode_start_addr_lo >> 2 |
2541                                         mec_hdr->ucode_start_addr_hi << 30);
2542                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2543                                         mec_hdr->ucode_start_addr_hi >> 2);
2544         }
2545         soc21_grbm_select(adev, 0, 0, 0, 0);
2546
2547         /* reset mec pipe */
2548         tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
2549         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
2550         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
2551         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
2552         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
2553         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2554
2555         /* clear mec pipe reset */
2556         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
2557         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
2558         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
2559         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
2560         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2561 }
2562
2563 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
2564 {
2565         uint32_t cp_status;
2566         uint32_t bootload_status;
2567         int i, r;
2568         uint64_t addr, addr2;
2569
2570         for (i = 0; i < adev->usec_timeout; i++) {
2571                 cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
2572
2573                 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1) ||
2574                                 adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 4))
2575                         bootload_status = RREG32_SOC15(GC, 0,
2576                                         regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
2577                 else
2578                         bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
2579
2580                 if ((cp_status == 0) &&
2581                     (REG_GET_FIELD(bootload_status,
2582                         RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
2583                         break;
2584                 }
2585                 udelay(1);
2586         }
2587
2588         if (i >= adev->usec_timeout) {
2589                 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
2590                 return -ETIMEDOUT;
2591         }
2592
2593         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
2594                 if (adev->gfx.rs64_enable) {
2595                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2596                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset;
2597                         addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2598                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset;
2599                         r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2);
2600                         if (r)
2601                                 return r;
2602                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2603                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset;
2604                         addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2605                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset;
2606                         r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2);
2607                         if (r)
2608                                 return r;
2609                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2610                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset;
2611                         addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2612                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset;
2613                         r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2);
2614                         if (r)
2615                                 return r;
2616                 } else {
2617                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2618                                 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset;
2619                         r = gfx_v11_0_config_me_cache(adev, addr);
2620                         if (r)
2621                                 return r;
2622                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2623                                 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset;
2624                         r = gfx_v11_0_config_pfp_cache(adev, addr);
2625                         if (r)
2626                                 return r;
2627                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2628                                 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset;
2629                         r = gfx_v11_0_config_mec_cache(adev, addr);
2630                         if (r)
2631                                 return r;
2632                 }
2633         }
2634
2635         return 0;
2636 }
2637
2638 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2639 {
2640         int i;
2641         u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2642
2643         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2644         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2645         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2646
2647         for (i = 0; i < adev->usec_timeout; i++) {
2648                 if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
2649                         break;
2650                 udelay(1);
2651         }
2652
2653         if (i >= adev->usec_timeout)
2654                 DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
2655
2656         return 0;
2657 }
2658
2659 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
2660 {
2661         int r;
2662         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2663         const __le32 *fw_data;
2664         unsigned i, fw_size;
2665
2666         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2667                 adev->gfx.pfp_fw->data;
2668
2669         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2670
2671         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2672                 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2673         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
2674
2675         r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
2676                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2677                                       &adev->gfx.pfp.pfp_fw_obj,
2678                                       &adev->gfx.pfp.pfp_fw_gpu_addr,
2679                                       (void **)&adev->gfx.pfp.pfp_fw_ptr);
2680         if (r) {
2681                 dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
2682                 gfx_v11_0_pfp_fini(adev);
2683                 return r;
2684         }
2685
2686         memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
2687
2688         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2689         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2690
2691         gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr);
2692
2693         WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0);
2694
2695         for (i = 0; i < pfp_hdr->jt_size; i++)
2696                 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA,
2697                              le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
2698
2699         WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2700
2701         return 0;
2702 }
2703
2704 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
2705 {
2706         int r;
2707         const struct gfx_firmware_header_v2_0 *pfp_hdr;
2708         const __le32 *fw_ucode, *fw_data;
2709         unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2710         uint32_t tmp;
2711         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2712
2713         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2714                 adev->gfx.pfp_fw->data;
2715
2716         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2717
2718         /* instruction */
2719         fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
2720                 le32_to_cpu(pfp_hdr->ucode_offset_bytes));
2721         fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
2722         /* data */
2723         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2724                 le32_to_cpu(pfp_hdr->data_offset_bytes));
2725         fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
2726
2727         /* 64kb align */
2728         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2729                                       64 * 1024,
2730                                       AMDGPU_GEM_DOMAIN_VRAM |
2731                                       AMDGPU_GEM_DOMAIN_GTT,
2732                                       &adev->gfx.pfp.pfp_fw_obj,
2733                                       &adev->gfx.pfp.pfp_fw_gpu_addr,
2734                                       (void **)&adev->gfx.pfp.pfp_fw_ptr);
2735         if (r) {
2736                 dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
2737                 gfx_v11_0_pfp_fini(adev);
2738                 return r;
2739         }
2740
2741         r = amdgpu_bo_create_reserved(adev, fw_data_size,
2742                                       64 * 1024,
2743                                       AMDGPU_GEM_DOMAIN_VRAM |
2744                                       AMDGPU_GEM_DOMAIN_GTT,
2745                                       &adev->gfx.pfp.pfp_fw_data_obj,
2746                                       &adev->gfx.pfp.pfp_fw_data_gpu_addr,
2747                                       (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
2748         if (r) {
2749                 dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
2750                 gfx_v11_0_pfp_fini(adev);
2751                 return r;
2752         }
2753
2754         memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
2755         memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
2756
2757         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2758         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
2759         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2760         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
2761
2762         if (amdgpu_emu_mode == 1)
2763                 adev->hdp.funcs->flush_hdp(adev, NULL);
2764
2765         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2766                 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2767         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2768                 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2769
2770         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2771         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2772         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2773         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2774         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2775
2776         /*
2777          * Programming any of the CP_PFP_IC_BASE registers
2778          * forces invalidation of the ME L1 I$. Wait for the
2779          * invalidation complete
2780          */
2781         for (i = 0; i < usec_timeout; i++) {
2782                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2783                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2784                         INVALIDATE_CACHE_COMPLETE))
2785                         break;
2786                 udelay(1);
2787         }
2788
2789         if (i >= usec_timeout) {
2790                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2791                 return -EINVAL;
2792         }
2793
2794         /* Prime the L1 instruction caches */
2795         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2796         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2797         WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2798         /* Waiting for cache primed*/
2799         for (i = 0; i < usec_timeout; i++) {
2800                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2801                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2802                         ICACHE_PRIMED))
2803                         break;
2804                 udelay(1);
2805         }
2806
2807         if (i >= usec_timeout) {
2808                 dev_err(adev->dev, "failed to prime instruction cache\n");
2809                 return -EINVAL;
2810         }
2811
2812         mutex_lock(&adev->srbm_mutex);
2813         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2814                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2815                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2816                         (pfp_hdr->ucode_start_addr_hi << 30) |
2817                         (pfp_hdr->ucode_start_addr_lo >> 2) );
2818                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2819                         pfp_hdr->ucode_start_addr_hi>>2);
2820
2821                 /*
2822                  * Program CP_ME_CNTL to reset given PIPE to take
2823                  * effect of CP_PFP_PRGRM_CNTR_START.
2824                  */
2825                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2826                 if (pipe_id == 0)
2827                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2828                                         PFP_PIPE0_RESET, 1);
2829                 else
2830                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2831                                         PFP_PIPE1_RESET, 1);
2832                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2833
2834                 /* Clear pfp pipe0 reset bit. */
2835                 if (pipe_id == 0)
2836                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2837                                         PFP_PIPE0_RESET, 0);
2838                 else
2839                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2840                                         PFP_PIPE1_RESET, 0);
2841                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2842
2843                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2844                         lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2845                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2846                         upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2847         }
2848         soc21_grbm_select(adev, 0, 0, 0, 0);
2849         mutex_unlock(&adev->srbm_mutex);
2850
2851         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2852         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2853         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2854         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2855
2856         /* Invalidate the data caches */
2857         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2858         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2859         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2860
2861         for (i = 0; i < usec_timeout; i++) {
2862                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2863                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2864                         INVALIDATE_DCACHE_COMPLETE))
2865                         break;
2866                 udelay(1);
2867         }
2868
2869         if (i >= usec_timeout) {
2870                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2871                 return -EINVAL;
2872         }
2873
2874         return 0;
2875 }
2876
2877 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
2878 {
2879         int r;
2880         const struct gfx_firmware_header_v1_0 *me_hdr;
2881         const __le32 *fw_data;
2882         unsigned i, fw_size;
2883
2884         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2885                 adev->gfx.me_fw->data;
2886
2887         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2888
2889         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
2890                 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2891         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
2892
2893         r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
2894                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2895                                       &adev->gfx.me.me_fw_obj,
2896                                       &adev->gfx.me.me_fw_gpu_addr,
2897                                       (void **)&adev->gfx.me.me_fw_ptr);
2898         if (r) {
2899                 dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
2900                 gfx_v11_0_me_fini(adev);
2901                 return r;
2902         }
2903
2904         memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
2905
2906         amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
2907         amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
2908
2909         gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr);
2910
2911         WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0);
2912
2913         for (i = 0; i < me_hdr->jt_size; i++)
2914                 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA,
2915                              le32_to_cpup(fw_data + me_hdr->jt_offset + i));
2916
2917         WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
2918
2919         return 0;
2920 }
2921
2922 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
2923 {
2924         int r;
2925         const struct gfx_firmware_header_v2_0 *me_hdr;
2926         const __le32 *fw_ucode, *fw_data;
2927         unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2928         uint32_t tmp;
2929         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2930
2931         me_hdr = (const struct gfx_firmware_header_v2_0 *)
2932                 adev->gfx.me_fw->data;
2933
2934         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2935
2936         /* instruction */
2937         fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
2938                 le32_to_cpu(me_hdr->ucode_offset_bytes));
2939         fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
2940         /* data */
2941         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
2942                 le32_to_cpu(me_hdr->data_offset_bytes));
2943         fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
2944
2945         /* 64kb align*/
2946         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2947                                       64 * 1024,
2948                                       AMDGPU_GEM_DOMAIN_VRAM |
2949                                       AMDGPU_GEM_DOMAIN_GTT,
2950                                       &adev->gfx.me.me_fw_obj,
2951                                       &adev->gfx.me.me_fw_gpu_addr,
2952                                       (void **)&adev->gfx.me.me_fw_ptr);
2953         if (r) {
2954                 dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
2955                 gfx_v11_0_me_fini(adev);
2956                 return r;
2957         }
2958
2959         r = amdgpu_bo_create_reserved(adev, fw_data_size,
2960                                       64 * 1024,
2961                                       AMDGPU_GEM_DOMAIN_VRAM |
2962                                       AMDGPU_GEM_DOMAIN_GTT,
2963                                       &adev->gfx.me.me_fw_data_obj,
2964                                       &adev->gfx.me.me_fw_data_gpu_addr,
2965                                       (void **)&adev->gfx.me.me_fw_data_ptr);
2966         if (r) {
2967                 dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
2968                 gfx_v11_0_pfp_fini(adev);
2969                 return r;
2970         }
2971
2972         memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
2973         memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
2974
2975         amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
2976         amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
2977         amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
2978         amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
2979
2980         if (amdgpu_emu_mode == 1)
2981                 adev->hdp.funcs->flush_hdp(adev, NULL);
2982
2983         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2984                 lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
2985         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2986                 upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
2987
2988         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2989         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2990         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2991         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2992         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2993
2994         /*
2995          * Programming any of the CP_ME_IC_BASE registers
2996          * forces invalidation of the ME L1 I$. Wait for the
2997          * invalidation complete
2998          */
2999         for (i = 0; i < usec_timeout; i++) {
3000                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3001                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
3002                         INVALIDATE_CACHE_COMPLETE))
3003                         break;
3004                 udelay(1);
3005         }
3006
3007         if (i >= usec_timeout) {
3008                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
3009                 return -EINVAL;
3010         }
3011
3012         /* Prime the instruction caches */
3013         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3014         tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
3015         WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
3016
3017         /* Waiting for instruction cache primed*/
3018         for (i = 0; i < usec_timeout; i++) {
3019                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3020                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
3021                         ICACHE_PRIMED))
3022                         break;
3023                 udelay(1);
3024         }
3025
3026         if (i >= usec_timeout) {
3027                 dev_err(adev->dev, "failed to prime instruction cache\n");
3028                 return -EINVAL;
3029         }
3030
3031         mutex_lock(&adev->srbm_mutex);
3032         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
3033                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
3034                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
3035                         (me_hdr->ucode_start_addr_hi << 30) |
3036                         (me_hdr->ucode_start_addr_lo >> 2) );
3037                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
3038                         me_hdr->ucode_start_addr_hi>>2);
3039
3040                 /*
3041                  * Program CP_ME_CNTL to reset given PIPE to take
3042                  * effect of CP_PFP_PRGRM_CNTR_START.
3043                  */
3044                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3045                 if (pipe_id == 0)
3046                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3047                                         ME_PIPE0_RESET, 1);
3048                 else
3049                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3050                                         ME_PIPE1_RESET, 1);
3051                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3052
3053                 /* Clear pfp pipe0 reset bit. */
3054                 if (pipe_id == 0)
3055                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3056                                         ME_PIPE0_RESET, 0);
3057                 else
3058                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3059                                         ME_PIPE1_RESET, 0);
3060                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3061
3062                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
3063                         lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
3064                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
3065                         upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
3066         }
3067         soc21_grbm_select(adev, 0, 0, 0, 0);
3068         mutex_unlock(&adev->srbm_mutex);
3069
3070         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
3071         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
3072         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
3073         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
3074
3075         /* Invalidate the data caches */
3076         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3077         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3078         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
3079
3080         for (i = 0; i < usec_timeout; i++) {
3081                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3082                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
3083                         INVALIDATE_DCACHE_COMPLETE))
3084                         break;
3085                 udelay(1);
3086         }
3087
3088         if (i >= usec_timeout) {
3089                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
3090                 return -EINVAL;
3091         }
3092
3093         return 0;
3094 }
3095
3096 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3097 {
3098         int r;
3099
3100         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
3101                 return -EINVAL;
3102
3103         gfx_v11_0_cp_gfx_enable(adev, false);
3104
3105         if (adev->gfx.rs64_enable)
3106                 r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev);
3107         else
3108                 r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev);
3109         if (r) {
3110                 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
3111                 return r;
3112         }
3113
3114         if (adev->gfx.rs64_enable)
3115                 r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev);
3116         else
3117                 r = gfx_v11_0_cp_gfx_load_me_microcode(adev);
3118         if (r) {
3119                 dev_err(adev->dev, "(%d) failed to load me fw\n", r);
3120                 return r;
3121         }
3122
3123         return 0;
3124 }
3125
3126 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev)
3127 {
3128         struct amdgpu_ring *ring;
3129         const struct cs_section_def *sect = NULL;
3130         const struct cs_extent_def *ext = NULL;
3131         int r, i;
3132         int ctx_reg_offset;
3133
3134         /* init the CP */
3135         WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
3136                      adev->gfx.config.max_hw_contexts - 1);
3137         WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
3138
3139         if (!amdgpu_async_gfx_ring)
3140                 gfx_v11_0_cp_gfx_enable(adev, true);
3141
3142         ring = &adev->gfx.gfx_ring[0];
3143         r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev));
3144         if (r) {
3145                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3146                 return r;
3147         }
3148
3149         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3150         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3151
3152         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3153         amdgpu_ring_write(ring, 0x80000000);
3154         amdgpu_ring_write(ring, 0x80000000);
3155
3156         for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
3157                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3158                         if (sect->id == SECT_CONTEXT) {
3159                                 amdgpu_ring_write(ring,
3160                                                   PACKET3(PACKET3_SET_CONTEXT_REG,
3161                                                           ext->reg_count));
3162                                 amdgpu_ring_write(ring, ext->reg_index -
3163                                                   PACKET3_SET_CONTEXT_REG_START);
3164                                 for (i = 0; i < ext->reg_count; i++)
3165                                         amdgpu_ring_write(ring, ext->extent[i]);
3166                         }
3167                 }
3168         }
3169
3170         ctx_reg_offset =
3171                 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
3172         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
3173         amdgpu_ring_write(ring, ctx_reg_offset);
3174         amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
3175
3176         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3177         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3178
3179         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3180         amdgpu_ring_write(ring, 0);
3181
3182         amdgpu_ring_commit(ring);
3183
3184         /* submit cs packet to copy state 0 to next available state */
3185         if (adev->gfx.num_gfx_rings > 1) {
3186                 /* maximum supported gfx ring is 2 */
3187                 ring = &adev->gfx.gfx_ring[1];
3188                 r = amdgpu_ring_alloc(ring, 2);
3189                 if (r) {
3190                         DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3191                         return r;
3192                 }
3193
3194                 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3195                 amdgpu_ring_write(ring, 0);
3196
3197                 amdgpu_ring_commit(ring);
3198         }
3199         return 0;
3200 }
3201
3202 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
3203                                          CP_PIPE_ID pipe)
3204 {
3205         u32 tmp;
3206
3207         tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
3208         tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
3209
3210         WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
3211 }
3212
3213 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
3214                                           struct amdgpu_ring *ring)
3215 {
3216         u32 tmp;
3217
3218         tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3219         if (ring->use_doorbell) {
3220                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3221                                     DOORBELL_OFFSET, ring->doorbell_index);
3222                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3223                                     DOORBELL_EN, 1);
3224         } else {
3225                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3226                                     DOORBELL_EN, 0);
3227         }
3228         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
3229
3230         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3231                             DOORBELL_RANGE_LOWER, ring->doorbell_index);
3232         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
3233
3234         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3235                      CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3236 }
3237
3238 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
3239 {
3240         struct amdgpu_ring *ring;
3241         u32 tmp;
3242         u32 rb_bufsz;
3243         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3244
3245         /* Set the write pointer delay */
3246         WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
3247
3248         /* set the RB to use vmid 0 */
3249         WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
3250
3251         /* Init gfx ring 0 for pipe 0 */
3252         mutex_lock(&adev->srbm_mutex);
3253         gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3254
3255         /* Set ring buffer size */
3256         ring = &adev->gfx.gfx_ring[0];
3257         rb_bufsz = order_base_2(ring->ring_size / 8);
3258         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3259         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3260         WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3261
3262         /* Initialize the ring buffer's write pointers */
3263         ring->wptr = 0;
3264         WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
3265         WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3266
3267         /* set the wb address wether it's enabled or not */
3268         rptr_addr = ring->rptr_gpu_addr;
3269         WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3270         WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3271                      CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3272
3273         wptr_gpu_addr = ring->wptr_gpu_addr;
3274         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3275                      lower_32_bits(wptr_gpu_addr));
3276         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3277                      upper_32_bits(wptr_gpu_addr));
3278
3279         mdelay(1);
3280         WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3281
3282         rb_addr = ring->gpu_addr >> 8;
3283         WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
3284         WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3285
3286         WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
3287
3288         gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3289         mutex_unlock(&adev->srbm_mutex);
3290
3291         /* Init gfx ring 1 for pipe 1 */
3292         if (adev->gfx.num_gfx_rings > 1) {
3293                 mutex_lock(&adev->srbm_mutex);
3294                 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
3295                 /* maximum supported gfx ring is 2 */
3296                 ring = &adev->gfx.gfx_ring[1];
3297                 rb_bufsz = order_base_2(ring->ring_size / 8);
3298                 tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
3299                 tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
3300                 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3301                 /* Initialize the ring buffer's write pointers */
3302                 ring->wptr = 0;
3303                 WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
3304                 WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
3305                 /* Set the wb address wether it's enabled or not */
3306                 rptr_addr = ring->rptr_gpu_addr;
3307                 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
3308                 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3309                              CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3310                 wptr_gpu_addr = ring->wptr_gpu_addr;
3311                 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3312                              lower_32_bits(wptr_gpu_addr));
3313                 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3314                              upper_32_bits(wptr_gpu_addr));
3315
3316                 mdelay(1);
3317                 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3318
3319                 rb_addr = ring->gpu_addr >> 8;
3320                 WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr);
3321                 WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr));
3322                 WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1);
3323
3324                 gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3325                 mutex_unlock(&adev->srbm_mutex);
3326         }
3327         /* Switch to pipe 0 */
3328         mutex_lock(&adev->srbm_mutex);
3329         gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3330         mutex_unlock(&adev->srbm_mutex);
3331
3332         /* start the ring */
3333         gfx_v11_0_cp_gfx_start(adev);
3334
3335         return 0;
3336 }
3337
3338 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3339 {
3340         u32 data;
3341
3342         if (adev->gfx.rs64_enable) {
3343                 data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
3344                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
3345                                                          enable ? 0 : 1);
3346                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
3347                                                          enable ? 0 : 1);
3348                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
3349                                                          enable ? 0 : 1);
3350                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
3351                                                          enable ? 0 : 1);
3352                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
3353                                                          enable ? 0 : 1);
3354                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
3355                                                          enable ? 1 : 0);
3356                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
3357                                                          enable ? 1 : 0);
3358                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
3359                                                          enable ? 1 : 0);
3360                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
3361                                                          enable ? 1 : 0);
3362                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
3363                                                          enable ? 0 : 1);
3364                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
3365         } else {
3366                 data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
3367
3368                 if (enable) {
3369                         data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0);
3370                         if (!adev->enable_mes_kiq)
3371                                 data = REG_SET_FIELD(data, CP_MEC_CNTL,
3372                                                      MEC_ME2_HALT, 0);
3373                 } else {
3374                         data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1);
3375                         data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1);
3376                 }
3377                 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data);
3378         }
3379
3380         udelay(50);
3381 }
3382
3383 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3384 {
3385         const struct gfx_firmware_header_v1_0 *mec_hdr;
3386         const __le32 *fw_data;
3387         unsigned i, fw_size;
3388         u32 *fw = NULL;
3389         int r;
3390
3391         if (!adev->gfx.mec_fw)
3392                 return -EINVAL;
3393
3394         gfx_v11_0_cp_compute_enable(adev, false);
3395
3396         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3397         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3398
3399         fw_data = (const __le32 *)
3400                 (adev->gfx.mec_fw->data +
3401                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3402         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
3403
3404         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
3405                                           PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3406                                           &adev->gfx.mec.mec_fw_obj,
3407                                           &adev->gfx.mec.mec_fw_gpu_addr,
3408                                           (void **)&fw);
3409         if (r) {
3410                 dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
3411                 gfx_v11_0_mec_fini(adev);
3412                 return r;
3413         }
3414
3415         memcpy(fw, fw_data, fw_size);
3416         
3417         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3418         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3419
3420         gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr);
3421
3422         /* MEC1 */
3423         WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0);
3424
3425         for (i = 0; i < mec_hdr->jt_size; i++)
3426                 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA,
3427                              le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3428
3429         WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3430
3431         return 0;
3432 }
3433
3434 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
3435 {
3436         const struct gfx_firmware_header_v2_0 *mec_hdr;
3437         const __le32 *fw_ucode, *fw_data;
3438         u32 tmp, fw_ucode_size, fw_data_size;
3439         u32 i, usec_timeout = 50000; /* Wait for 50 ms */
3440         u32 *fw_ucode_ptr, *fw_data_ptr;
3441         int r;
3442
3443         if (!adev->gfx.mec_fw)
3444                 return -EINVAL;
3445
3446         gfx_v11_0_cp_compute_enable(adev, false);
3447
3448         mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
3449         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3450
3451         fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
3452                                 le32_to_cpu(mec_hdr->ucode_offset_bytes));
3453         fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
3454
3455         fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
3456                                 le32_to_cpu(mec_hdr->data_offset_bytes));
3457         fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
3458
3459         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3460                                       64 * 1024,
3461                                       AMDGPU_GEM_DOMAIN_VRAM |
3462                                       AMDGPU_GEM_DOMAIN_GTT,
3463                                       &adev->gfx.mec.mec_fw_obj,
3464                                       &adev->gfx.mec.mec_fw_gpu_addr,
3465                                       (void **)&fw_ucode_ptr);
3466         if (r) {
3467                 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3468                 gfx_v11_0_mec_fini(adev);
3469                 return r;
3470         }
3471
3472         r = amdgpu_bo_create_reserved(adev, fw_data_size,
3473                                       64 * 1024,
3474                                       AMDGPU_GEM_DOMAIN_VRAM |
3475                                       AMDGPU_GEM_DOMAIN_GTT,
3476                                       &adev->gfx.mec.mec_fw_data_obj,
3477                                       &adev->gfx.mec.mec_fw_data_gpu_addr,
3478                                       (void **)&fw_data_ptr);
3479         if (r) {
3480                 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3481                 gfx_v11_0_mec_fini(adev);
3482                 return r;
3483         }
3484
3485         memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
3486         memcpy(fw_data_ptr, fw_data, fw_data_size);
3487
3488         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3489         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
3490         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3491         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
3492
3493         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
3494         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3495         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
3496         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3497         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
3498
3499         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
3500         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
3501         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
3502         WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
3503
3504         mutex_lock(&adev->srbm_mutex);
3505         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
3506                 soc21_grbm_select(adev, 1, i, 0, 0);
3507
3508                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr);
3509                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
3510                      upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr));
3511
3512                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
3513                                         mec_hdr->ucode_start_addr_lo >> 2 |
3514                                         mec_hdr->ucode_start_addr_hi << 30);
3515                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
3516                                         mec_hdr->ucode_start_addr_hi >> 2);
3517
3518                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr);
3519                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
3520                      upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3521         }
3522         mutex_unlock(&adev->srbm_mutex);
3523         soc21_grbm_select(adev, 0, 0, 0, 0);
3524
3525         /* Trigger an invalidation of the L1 instruction caches */
3526         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3527         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3528         WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
3529
3530         /* Wait for invalidation complete */
3531         for (i = 0; i < usec_timeout; i++) {
3532                 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3533                 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
3534                                        INVALIDATE_DCACHE_COMPLETE))
3535                         break;
3536                 udelay(1);
3537         }
3538
3539         if (i >= usec_timeout) {
3540                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
3541                 return -EINVAL;
3542         }
3543
3544         /* Trigger an invalidation of the L1 instruction caches */
3545         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3546         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
3547         WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
3548
3549         /* Wait for invalidation complete */
3550         for (i = 0; i < usec_timeout; i++) {
3551                 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3552                 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
3553                                        INVALIDATE_CACHE_COMPLETE))
3554                         break;
3555                 udelay(1);
3556         }
3557
3558         if (i >= usec_timeout) {
3559                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
3560                 return -EINVAL;
3561         }
3562
3563         return 0;
3564 }
3565
3566 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
3567 {
3568         uint32_t tmp;
3569         struct amdgpu_device *adev = ring->adev;
3570
3571         /* tell RLC which is KIQ queue */
3572         tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
3573         tmp &= 0xffffff00;
3574         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3575         WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
3576         tmp |= 0x80;
3577         WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
3578 }
3579
3580 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
3581 {
3582         /* set graphics engine doorbell range */
3583         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
3584                      (adev->doorbell_index.gfx_ring0 * 2) << 2);
3585         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3586                      (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
3587
3588         /* set compute engine doorbell range */
3589         WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
3590                      (adev->doorbell_index.kiq * 2) << 2);
3591         WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
3592                      (adev->doorbell_index.userqueue_end * 2) << 2);
3593 }
3594
3595 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
3596                                   struct amdgpu_mqd_prop *prop)
3597 {
3598         struct v11_gfx_mqd *mqd = m;
3599         uint64_t hqd_gpu_addr, wb_gpu_addr;
3600         uint32_t tmp;
3601         uint32_t rb_bufsz;
3602
3603         /* set up gfx hqd wptr */
3604         mqd->cp_gfx_hqd_wptr = 0;
3605         mqd->cp_gfx_hqd_wptr_hi = 0;
3606
3607         /* set the pointer to the MQD */
3608         mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
3609         mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
3610
3611         /* set up mqd control */
3612         tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
3613         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
3614         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
3615         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
3616         mqd->cp_gfx_mqd_control = tmp;
3617
3618         /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
3619         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
3620         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
3621         mqd->cp_gfx_hqd_vmid = 0;
3622
3623         /* set up default queue priority level
3624          * 0x0 = low priority, 0x1 = high priority */
3625         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
3626         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
3627         mqd->cp_gfx_hqd_queue_priority = tmp;
3628
3629         /* set up time quantum */
3630         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
3631         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
3632         mqd->cp_gfx_hqd_quantum = tmp;
3633
3634         /* set up gfx hqd base. this is similar as CP_RB_BASE */
3635         hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
3636         mqd->cp_gfx_hqd_base = hqd_gpu_addr;
3637         mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
3638
3639         /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
3640         wb_gpu_addr = prop->rptr_gpu_addr;
3641         mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
3642         mqd->cp_gfx_hqd_rptr_addr_hi =
3643                 upper_32_bits(wb_gpu_addr) & 0xffff;
3644
3645         /* set up rb_wptr_poll addr */
3646         wb_gpu_addr = prop->wptr_gpu_addr;
3647         mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3648         mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3649
3650         /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
3651         rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
3652         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
3653         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
3654         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
3655 #ifdef __BIG_ENDIAN
3656         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
3657 #endif
3658         mqd->cp_gfx_hqd_cntl = tmp;
3659
3660         /* set up cp_doorbell_control */
3661         tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3662         if (prop->use_doorbell) {
3663                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3664                                     DOORBELL_OFFSET, prop->doorbell_index);
3665                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3666                                     DOORBELL_EN, 1);
3667         } else
3668                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3669                                     DOORBELL_EN, 0);
3670         mqd->cp_rb_doorbell_control = tmp;
3671
3672         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3673         mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
3674
3675         /* active the queue */
3676         mqd->cp_gfx_hqd_active = 1;
3677
3678         return 0;
3679 }
3680
3681 static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
3682 {
3683         struct amdgpu_device *adev = ring->adev;
3684         struct v11_gfx_mqd *mqd = ring->mqd_ptr;
3685         int mqd_idx = ring - &adev->gfx.gfx_ring[0];
3686
3687         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3688                 memset((void *)mqd, 0, sizeof(*mqd));
3689                 mutex_lock(&adev->srbm_mutex);
3690                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3691                 amdgpu_ring_init_mqd(ring);
3692                 soc21_grbm_select(adev, 0, 0, 0, 0);
3693                 mutex_unlock(&adev->srbm_mutex);
3694                 if (adev->gfx.me.mqd_backup[mqd_idx])
3695                         memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
3696         } else {
3697                 /* restore mqd with the backup copy */
3698                 if (adev->gfx.me.mqd_backup[mqd_idx])
3699                         memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
3700                 /* reset the ring */
3701                 ring->wptr = 0;
3702                 *ring->wptr_cpu_addr = 0;
3703                 amdgpu_ring_clear_ring(ring);
3704         }
3705
3706         return 0;
3707 }
3708
3709 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
3710 {
3711         int r, i;
3712         struct amdgpu_ring *ring;
3713
3714         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3715                 ring = &adev->gfx.gfx_ring[i];
3716
3717                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3718                 if (unlikely(r != 0))
3719                         return r;
3720
3721                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3722                 if (!r) {
3723                         r = gfx_v11_0_gfx_init_queue(ring);
3724                         amdgpu_bo_kunmap(ring->mqd_obj);
3725                         ring->mqd_ptr = NULL;
3726                 }
3727                 amdgpu_bo_unreserve(ring->mqd_obj);
3728                 if (r)
3729                         return r;
3730         }
3731
3732         r = amdgpu_gfx_enable_kgq(adev, 0);
3733         if (r)
3734                 return r;
3735
3736         return gfx_v11_0_cp_gfx_start(adev);
3737 }
3738
3739 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
3740                                       struct amdgpu_mqd_prop *prop)
3741 {
3742         struct v11_compute_mqd *mqd = m;
3743         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3744         uint32_t tmp;
3745
3746         mqd->header = 0xC0310800;
3747         mqd->compute_pipelinestat_enable = 0x00000001;
3748         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3749         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3750         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3751         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3752         mqd->compute_misc_reserved = 0x00000007;
3753
3754         eop_base_addr = prop->eop_gpu_addr >> 8;
3755         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3756         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3757
3758         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3759         tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL);
3760         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3761                         (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
3762
3763         mqd->cp_hqd_eop_control = tmp;
3764
3765         /* enable doorbell? */
3766         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3767
3768         if (prop->use_doorbell) {
3769                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3770                                     DOORBELL_OFFSET, prop->doorbell_index);
3771                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3772                                     DOORBELL_EN, 1);
3773                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3774                                     DOORBELL_SOURCE, 0);
3775                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3776                                     DOORBELL_HIT, 0);
3777         } else {
3778                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3779                                     DOORBELL_EN, 0);
3780         }
3781
3782         mqd->cp_hqd_pq_doorbell_control = tmp;
3783
3784         /* disable the queue if it's active */
3785         mqd->cp_hqd_dequeue_request = 0;
3786         mqd->cp_hqd_pq_rptr = 0;
3787         mqd->cp_hqd_pq_wptr_lo = 0;
3788         mqd->cp_hqd_pq_wptr_hi = 0;
3789
3790         /* set the pointer to the MQD */
3791         mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
3792         mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
3793
3794         /* set MQD vmid to 0 */
3795         tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
3796         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3797         mqd->cp_mqd_control = tmp;
3798
3799         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3800         hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
3801         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3802         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3803
3804         /* set up the HQD, this is similar to CP_RB0_CNTL */
3805         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL);
3806         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3807                             (order_base_2(prop->queue_size / 4) - 1));
3808         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3809                             (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3810         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3811         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
3812         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3813         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3814         mqd->cp_hqd_pq_control = tmp;
3815
3816         /* set the wb address whether it's enabled or not */
3817         wb_gpu_addr = prop->rptr_gpu_addr;
3818         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3819         mqd->cp_hqd_pq_rptr_report_addr_hi =
3820                 upper_32_bits(wb_gpu_addr) & 0xffff;
3821
3822         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3823         wb_gpu_addr = prop->wptr_gpu_addr;
3824         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3825         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3826
3827         tmp = 0;
3828         /* enable the doorbell if requested */
3829         if (prop->use_doorbell) {
3830                 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3831                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3832                                 DOORBELL_OFFSET, prop->doorbell_index);
3833
3834                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3835                                     DOORBELL_EN, 1);
3836                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3837                                     DOORBELL_SOURCE, 0);
3838                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3839                                     DOORBELL_HIT, 0);
3840         }
3841
3842         mqd->cp_hqd_pq_doorbell_control = tmp;
3843
3844         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3845         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR);
3846
3847         /* set the vmid for the queue */
3848         mqd->cp_hqd_vmid = 0;
3849
3850         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE);
3851         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
3852         mqd->cp_hqd_persistent_state = tmp;
3853
3854         /* set MIN_IB_AVAIL_SIZE */
3855         tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL);
3856         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3857         mqd->cp_hqd_ib_control = tmp;
3858
3859         /* set static priority for a compute queue/ring */
3860         mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
3861         mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
3862
3863         mqd->cp_hqd_active = prop->hqd_active;
3864
3865         return 0;
3866 }
3867
3868 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring)
3869 {
3870         struct amdgpu_device *adev = ring->adev;
3871         struct v11_compute_mqd *mqd = ring->mqd_ptr;
3872         int j;
3873
3874         /* inactivate the queue */
3875         if (amdgpu_sriov_vf(adev))
3876                 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
3877
3878         /* disable wptr polling */
3879         WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3880
3881         /* write the EOP addr */
3882         WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
3883                mqd->cp_hqd_eop_base_addr_lo);
3884         WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
3885                mqd->cp_hqd_eop_base_addr_hi);
3886
3887         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3888         WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
3889                mqd->cp_hqd_eop_control);
3890
3891         /* enable doorbell? */
3892         WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
3893                mqd->cp_hqd_pq_doorbell_control);
3894
3895         /* disable the queue if it's active */
3896         if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
3897                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
3898                 for (j = 0; j < adev->usec_timeout; j++) {
3899                         if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
3900                                 break;
3901                         udelay(1);
3902                 }
3903                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
3904                        mqd->cp_hqd_dequeue_request);
3905                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
3906                        mqd->cp_hqd_pq_rptr);
3907                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
3908                        mqd->cp_hqd_pq_wptr_lo);
3909                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
3910                        mqd->cp_hqd_pq_wptr_hi);
3911         }
3912
3913         /* set the pointer to the MQD */
3914         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
3915                mqd->cp_mqd_base_addr_lo);
3916         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
3917                mqd->cp_mqd_base_addr_hi);
3918
3919         /* set MQD vmid to 0 */
3920         WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
3921                mqd->cp_mqd_control);
3922
3923         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3924         WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
3925                mqd->cp_hqd_pq_base_lo);
3926         WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
3927                mqd->cp_hqd_pq_base_hi);
3928
3929         /* set up the HQD, this is similar to CP_RB0_CNTL */
3930         WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
3931                mqd->cp_hqd_pq_control);
3932
3933         /* set the wb address whether it's enabled or not */
3934         WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
3935                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3936         WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3937                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3938
3939         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3940         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
3941                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3942         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3943                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3944
3945         /* enable the doorbell if requested */
3946         if (ring->use_doorbell) {
3947                 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
3948                         (adev->doorbell_index.kiq * 2) << 2);
3949                 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
3950                         (adev->doorbell_index.userqueue_end * 2) << 2);
3951         }
3952
3953         WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
3954                mqd->cp_hqd_pq_doorbell_control);
3955
3956         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3957         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
3958                mqd->cp_hqd_pq_wptr_lo);
3959         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
3960                mqd->cp_hqd_pq_wptr_hi);
3961
3962         /* set the vmid for the queue */
3963         WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
3964
3965         WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
3966                mqd->cp_hqd_persistent_state);
3967
3968         /* activate the queue */
3969         WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
3970                mqd->cp_hqd_active);
3971
3972         if (ring->use_doorbell)
3973                 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3974
3975         return 0;
3976 }
3977
3978 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
3979 {
3980         struct amdgpu_device *adev = ring->adev;
3981         struct v11_compute_mqd *mqd = ring->mqd_ptr;
3982
3983         gfx_v11_0_kiq_setting(ring);
3984
3985         if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3986                 /* reset MQD to a clean status */
3987                 if (adev->gfx.kiq[0].mqd_backup)
3988                         memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
3989
3990                 /* reset ring buffer */
3991                 ring->wptr = 0;
3992                 amdgpu_ring_clear_ring(ring);
3993
3994                 mutex_lock(&adev->srbm_mutex);
3995                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3996                 gfx_v11_0_kiq_init_register(ring);
3997                 soc21_grbm_select(adev, 0, 0, 0, 0);
3998                 mutex_unlock(&adev->srbm_mutex);
3999         } else {
4000                 memset((void *)mqd, 0, sizeof(*mqd));
4001                 if (amdgpu_sriov_vf(adev) && adev->in_suspend)
4002                         amdgpu_ring_clear_ring(ring);
4003                 mutex_lock(&adev->srbm_mutex);
4004                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4005                 amdgpu_ring_init_mqd(ring);
4006                 gfx_v11_0_kiq_init_register(ring);
4007                 soc21_grbm_select(adev, 0, 0, 0, 0);
4008                 mutex_unlock(&adev->srbm_mutex);
4009
4010                 if (adev->gfx.kiq[0].mqd_backup)
4011                         memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
4012         }
4013
4014         return 0;
4015 }
4016
4017 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring)
4018 {
4019         struct amdgpu_device *adev = ring->adev;
4020         struct v11_compute_mqd *mqd = ring->mqd_ptr;
4021         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4022
4023         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4024                 memset((void *)mqd, 0, sizeof(*mqd));
4025                 mutex_lock(&adev->srbm_mutex);
4026                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4027                 amdgpu_ring_init_mqd(ring);
4028                 soc21_grbm_select(adev, 0, 0, 0, 0);
4029                 mutex_unlock(&adev->srbm_mutex);
4030
4031                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4032                         memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4033         } else {
4034                 /* restore MQD to a clean status */
4035                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4036                         memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4037                 /* reset ring buffer */
4038                 ring->wptr = 0;
4039                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
4040                 amdgpu_ring_clear_ring(ring);
4041         }
4042
4043         return 0;
4044 }
4045
4046 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
4047 {
4048         struct amdgpu_ring *ring;
4049         int r;
4050
4051         ring = &adev->gfx.kiq[0].ring;
4052
4053         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4054         if (unlikely(r != 0))
4055                 return r;
4056
4057         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4058         if (unlikely(r != 0)) {
4059                 amdgpu_bo_unreserve(ring->mqd_obj);
4060                 return r;
4061         }
4062
4063         gfx_v11_0_kiq_init_queue(ring);
4064         amdgpu_bo_kunmap(ring->mqd_obj);
4065         ring->mqd_ptr = NULL;
4066         amdgpu_bo_unreserve(ring->mqd_obj);
4067         ring->sched.ready = true;
4068         return 0;
4069 }
4070
4071 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
4072 {
4073         struct amdgpu_ring *ring = NULL;
4074         int r = 0, i;
4075
4076         if (!amdgpu_async_gfx_ring)
4077                 gfx_v11_0_cp_compute_enable(adev, true);
4078
4079         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4080                 ring = &adev->gfx.compute_ring[i];
4081
4082                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4083                 if (unlikely(r != 0))
4084                         goto done;
4085                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4086                 if (!r) {
4087                         r = gfx_v11_0_kcq_init_queue(ring);
4088                         amdgpu_bo_kunmap(ring->mqd_obj);
4089                         ring->mqd_ptr = NULL;
4090                 }
4091                 amdgpu_bo_unreserve(ring->mqd_obj);
4092                 if (r)
4093                         goto done;
4094         }
4095
4096         r = amdgpu_gfx_enable_kcq(adev, 0);
4097 done:
4098         return r;
4099 }
4100
4101 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
4102 {
4103         int r, i;
4104         struct amdgpu_ring *ring;
4105
4106         if (!(adev->flags & AMD_IS_APU))
4107                 gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4108
4109         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4110                 /* legacy firmware loading */
4111                 r = gfx_v11_0_cp_gfx_load_microcode(adev);
4112                 if (r)
4113                         return r;
4114
4115                 if (adev->gfx.rs64_enable)
4116                         r = gfx_v11_0_cp_compute_load_microcode_rs64(adev);
4117                 else
4118                         r = gfx_v11_0_cp_compute_load_microcode(adev);
4119                 if (r)
4120                         return r;
4121         }
4122
4123         gfx_v11_0_cp_set_doorbell_range(adev);
4124
4125         if (amdgpu_async_gfx_ring) {
4126                 gfx_v11_0_cp_compute_enable(adev, true);
4127                 gfx_v11_0_cp_gfx_enable(adev, true);
4128         }
4129
4130         if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
4131                 r = amdgpu_mes_kiq_hw_init(adev);
4132         else
4133                 r = gfx_v11_0_kiq_resume(adev);
4134         if (r)
4135                 return r;
4136
4137         r = gfx_v11_0_kcq_resume(adev);
4138         if (r)
4139                 return r;
4140
4141         if (!amdgpu_async_gfx_ring) {
4142                 r = gfx_v11_0_cp_gfx_resume(adev);
4143                 if (r)
4144                         return r;
4145         } else {
4146                 r = gfx_v11_0_cp_async_gfx_ring_resume(adev);
4147                 if (r)
4148                         return r;
4149         }
4150
4151         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4152                 ring = &adev->gfx.gfx_ring[i];
4153                 r = amdgpu_ring_test_helper(ring);
4154                 if (r)
4155                         return r;
4156         }
4157
4158         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4159                 ring = &adev->gfx.compute_ring[i];
4160                 r = amdgpu_ring_test_helper(ring);
4161                 if (r)
4162                         return r;
4163         }
4164
4165         return 0;
4166 }
4167
4168 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable)
4169 {
4170         gfx_v11_0_cp_gfx_enable(adev, enable);
4171         gfx_v11_0_cp_compute_enable(adev, enable);
4172 }
4173
4174 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
4175 {
4176         int r;
4177         bool value;
4178
4179         r = adev->gfxhub.funcs->gart_enable(adev);
4180         if (r)
4181                 return r;
4182
4183         adev->hdp.funcs->flush_hdp(adev, NULL);
4184
4185         value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
4186                 false : true;
4187
4188         adev->gfxhub.funcs->set_fault_enable_default(adev, value);
4189         amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
4190
4191         return 0;
4192 }
4193
4194 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev)
4195 {
4196         u32 tmp;
4197
4198         /* select RS64 */
4199         if (adev->gfx.rs64_enable) {
4200                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL);
4201                 tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1);
4202                 WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp);
4203
4204                 tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL);
4205                 tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1);
4206                 WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp);
4207         }
4208
4209         if (amdgpu_emu_mode == 1)
4210                 msleep(100);
4211 }
4212
4213 static int get_gb_addr_config(struct amdgpu_device * adev)
4214 {
4215         u32 gb_addr_config;
4216
4217         gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
4218         if (gb_addr_config == 0)
4219                 return -EINVAL;
4220
4221         adev->gfx.config.gb_addr_config_fields.num_pkrs =
4222                 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
4223
4224         adev->gfx.config.gb_addr_config = gb_addr_config;
4225
4226         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
4227                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4228                                       GB_ADDR_CONFIG, NUM_PIPES);
4229
4230         adev->gfx.config.max_tile_pipes =
4231                 adev->gfx.config.gb_addr_config_fields.num_pipes;
4232
4233         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
4234                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4235                                       GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
4236         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
4237                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4238                                       GB_ADDR_CONFIG, NUM_RB_PER_SE);
4239         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
4240                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4241                                       GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
4242         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
4243                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4244                                       GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
4245
4246         return 0;
4247 }
4248
4249 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
4250 {
4251         uint32_t data;
4252
4253         data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
4254         data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
4255         WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
4256
4257         data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
4258         data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
4259         WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
4260 }
4261
4262 static int gfx_v11_0_hw_init(void *handle)
4263 {
4264         int r;
4265         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4266
4267         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
4268                 if (adev->gfx.imu.funcs) {
4269                         /* RLC autoload sequence 1: Program rlc ram */
4270                         if (adev->gfx.imu.funcs->program_rlc_ram)
4271                                 adev->gfx.imu.funcs->program_rlc_ram(adev);
4272                 }
4273                 /* rlc autoload firmware */
4274                 r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
4275                 if (r)
4276                         return r;
4277         } else {
4278                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4279                         if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
4280                                 if (adev->gfx.imu.funcs->load_microcode)
4281                                         adev->gfx.imu.funcs->load_microcode(adev);
4282                                 if (adev->gfx.imu.funcs->setup_imu)
4283                                         adev->gfx.imu.funcs->setup_imu(adev);
4284                                 if (adev->gfx.imu.funcs->start_imu)
4285                                         adev->gfx.imu.funcs->start_imu(adev);
4286                         }
4287
4288                         /* disable gpa mode in backdoor loading */
4289                         gfx_v11_0_disable_gpa_mode(adev);
4290                 }
4291         }
4292
4293         if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
4294             (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
4295                 r = gfx_v11_0_wait_for_rlc_autoload_complete(adev);
4296                 if (r) {
4297                         dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
4298                         return r;
4299                 }
4300         }
4301
4302         adev->gfx.is_poweron = true;
4303
4304         if(get_gb_addr_config(adev))
4305                 DRM_WARN("Invalid gb_addr_config !\n");
4306
4307         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
4308             adev->gfx.rs64_enable)
4309                 gfx_v11_0_config_gfx_rs64(adev);
4310
4311         r = gfx_v11_0_gfxhub_enable(adev);
4312         if (r)
4313                 return r;
4314
4315         if (!amdgpu_emu_mode)
4316                 gfx_v11_0_init_golden_registers(adev);
4317
4318         if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
4319             (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
4320                 /**
4321                  * For gfx 11, rlc firmware loading relies on smu firmware is
4322                  * loaded firstly, so in direct type, it has to load smc ucode
4323                  * here before rlc.
4324                  */
4325                 if (!(adev->flags & AMD_IS_APU)) {
4326                         r = amdgpu_pm_load_smu_firmware(adev, NULL);
4327                         if (r)
4328                                 return r;
4329                 }
4330         }
4331
4332         gfx_v11_0_constants_init(adev);
4333
4334         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
4335                 gfx_v11_0_select_cp_fw_arch(adev);
4336
4337         if (adev->nbio.funcs->gc_doorbell_init)
4338                 adev->nbio.funcs->gc_doorbell_init(adev);
4339
4340         r = gfx_v11_0_rlc_resume(adev);
4341         if (r)
4342                 return r;
4343
4344         /*
4345          * init golden registers and rlc resume may override some registers,
4346          * reconfig them here
4347          */
4348         gfx_v11_0_tcp_harvest(adev);
4349
4350         r = gfx_v11_0_cp_resume(adev);
4351         if (r)
4352                 return r;
4353
4354         return r;
4355 }
4356
4357 static int gfx_v11_0_hw_fini(void *handle)
4358 {
4359         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4360
4361         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4362         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4363
4364         if (!adev->no_hw_access) {
4365                 if (amdgpu_async_gfx_ring) {
4366                         if (amdgpu_gfx_disable_kgq(adev, 0))
4367                                 DRM_ERROR("KGQ disable failed\n");
4368                 }
4369
4370                 if (amdgpu_gfx_disable_kcq(adev, 0))
4371                         DRM_ERROR("KCQ disable failed\n");
4372
4373                 amdgpu_mes_kiq_hw_fini(adev);
4374         }
4375
4376         if (amdgpu_sriov_vf(adev))
4377                 /* Remove the steps disabling CPG and clearing KIQ position,
4378                  * so that CP could perform IDLE-SAVE during switch. Those
4379                  * steps are necessary to avoid a DMAR error in gfx9 but it is
4380                  * not reproduced on gfx11.
4381                  */
4382                 return 0;
4383
4384         gfx_v11_0_cp_enable(adev, false);
4385         gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4386
4387         adev->gfxhub.funcs->gart_disable(adev);
4388
4389         adev->gfx.is_poweron = false;
4390
4391         return 0;
4392 }
4393
4394 static int gfx_v11_0_suspend(void *handle)
4395 {
4396         return gfx_v11_0_hw_fini(handle);
4397 }
4398
4399 static int gfx_v11_0_resume(void *handle)
4400 {
4401         return gfx_v11_0_hw_init(handle);
4402 }
4403
4404 static bool gfx_v11_0_is_idle(void *handle)
4405 {
4406         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4407
4408         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
4409                                 GRBM_STATUS, GUI_ACTIVE))
4410                 return false;
4411         else
4412                 return true;
4413 }
4414
4415 static int gfx_v11_0_wait_for_idle(void *handle)
4416 {
4417         unsigned i;
4418         u32 tmp;
4419         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4420
4421         for (i = 0; i < adev->usec_timeout; i++) {
4422                 /* read MC_STATUS */
4423                 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
4424                         GRBM_STATUS__GUI_ACTIVE_MASK;
4425
4426                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4427                         return 0;
4428                 udelay(1);
4429         }
4430         return -ETIMEDOUT;
4431 }
4432
4433 static int gfx_v11_0_soft_reset(void *handle)
4434 {
4435         u32 grbm_soft_reset = 0;
4436         u32 tmp;
4437         int i, j, k;
4438         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4439
4440         tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4441         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
4442         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0);
4443         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0);
4444         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
4445         WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4446
4447         gfx_v11_0_set_safe_mode(adev, 0);
4448
4449         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
4450                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
4451                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
4452                                 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
4453                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
4454                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
4455                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
4456                                 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
4457
4458                                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
4459                                 WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
4460                         }
4461                 }
4462         }
4463         for (i = 0; i < adev->gfx.me.num_me; ++i) {
4464                 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
4465                         for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
4466                                 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
4467                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
4468                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
4469                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
4470                                 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
4471
4472                                 WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
4473                         }
4474                 }
4475         }
4476
4477         WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
4478
4479         // Read CP_VMID_RESET register three times.
4480         // to get sufficient time for GFX_HQD_ACTIVE reach 0
4481         RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4482         RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4483         RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4484
4485         for (i = 0; i < adev->usec_timeout; i++) {
4486                 if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
4487                     !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
4488                         break;
4489                 udelay(1);
4490         }
4491         if (i >= adev->usec_timeout) {
4492                 printk("Failed to wait all pipes clean\n");
4493                 return -EINVAL;
4494         }
4495
4496         /**********  trigger soft reset  ***********/
4497         grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4498         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4499                                         SOFT_RESET_CP, 1);
4500         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4501                                         SOFT_RESET_GFX, 1);
4502         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4503                                         SOFT_RESET_CPF, 1);
4504         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4505                                         SOFT_RESET_CPC, 1);
4506         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4507                                         SOFT_RESET_CPG, 1);
4508         WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4509         /**********  exit soft reset  ***********/
4510         grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4511         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4512                                         SOFT_RESET_CP, 0);
4513         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4514                                         SOFT_RESET_GFX, 0);
4515         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4516                                         SOFT_RESET_CPF, 0);
4517         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4518                                         SOFT_RESET_CPC, 0);
4519         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4520                                         SOFT_RESET_CPG, 0);
4521         WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4522
4523         tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL);
4524         tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1);
4525         WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp);
4526
4527         WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0);
4528         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0);
4529
4530         for (i = 0; i < adev->usec_timeout; i++) {
4531                 if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET))
4532                         break;
4533                 udelay(1);
4534         }
4535         if (i >= adev->usec_timeout) {
4536                 printk("Failed to wait CP_VMID_RESET to 0\n");
4537                 return -EINVAL;
4538         }
4539
4540         tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4541         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
4542         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
4543         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
4544         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
4545         WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4546
4547         gfx_v11_0_unset_safe_mode(adev, 0);
4548
4549         return gfx_v11_0_cp_resume(adev);
4550 }
4551
4552 static bool gfx_v11_0_check_soft_reset(void *handle)
4553 {
4554         int i, r;
4555         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4556         struct amdgpu_ring *ring;
4557         long tmo = msecs_to_jiffies(1000);
4558
4559         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4560                 ring = &adev->gfx.gfx_ring[i];
4561                 r = amdgpu_ring_test_ib(ring, tmo);
4562                 if (r)
4563                         return true;
4564         }
4565
4566         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4567                 ring = &adev->gfx.compute_ring[i];
4568                 r = amdgpu_ring_test_ib(ring, tmo);
4569                 if (r)
4570                         return true;
4571         }
4572
4573         return false;
4574 }
4575
4576 static int gfx_v11_0_post_soft_reset(void *handle)
4577 {
4578         /**
4579          * GFX soft reset will impact MES, need resume MES when do GFX soft reset
4580          */
4581         return amdgpu_mes_resume((struct amdgpu_device *)handle);
4582 }
4583
4584 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4585 {
4586         uint64_t clock;
4587         uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after;
4588
4589         if (amdgpu_sriov_vf(adev)) {
4590                 amdgpu_gfx_off_ctrl(adev, false);
4591                 mutex_lock(&adev->gfx.gpu_clock_mutex);
4592                 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
4593                 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
4594                 clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
4595                 if (clock_counter_hi_pre != clock_counter_hi_after)
4596                         clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
4597                 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4598                 amdgpu_gfx_off_ctrl(adev, true);
4599         } else {
4600                 preempt_disable();
4601                 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
4602                 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
4603                 clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
4604                 if (clock_counter_hi_pre != clock_counter_hi_after)
4605                         clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
4606                 preempt_enable();
4607         }
4608         clock = clock_counter_lo | (clock_counter_hi_after << 32ULL);
4609
4610         return clock;
4611 }
4612
4613 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4614                                            uint32_t vmid,
4615                                            uint32_t gds_base, uint32_t gds_size,
4616                                            uint32_t gws_base, uint32_t gws_size,
4617                                            uint32_t oa_base, uint32_t oa_size)
4618 {
4619         struct amdgpu_device *adev = ring->adev;
4620
4621         /* GDS Base */
4622         gfx_v11_0_write_data_to_reg(ring, 0, false,
4623                                     SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid,
4624                                     gds_base);
4625
4626         /* GDS Size */
4627         gfx_v11_0_write_data_to_reg(ring, 0, false,
4628                                     SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid,
4629                                     gds_size);
4630
4631         /* GWS */
4632         gfx_v11_0_write_data_to_reg(ring, 0, false,
4633                                     SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid,
4634                                     gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4635
4636         /* OA */
4637         gfx_v11_0_write_data_to_reg(ring, 0, false,
4638                                     SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid,
4639                                     (1 << (oa_size + oa_base)) - (1 << oa_base));
4640 }
4641
4642 static int gfx_v11_0_early_init(void *handle)
4643 {
4644         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4645
4646         adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
4647
4648         adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
4649         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4650                                           AMDGPU_MAX_COMPUTE_RINGS);
4651
4652         gfx_v11_0_set_kiq_pm4_funcs(adev);
4653         gfx_v11_0_set_ring_funcs(adev);
4654         gfx_v11_0_set_irq_funcs(adev);
4655         gfx_v11_0_set_gds_init(adev);
4656         gfx_v11_0_set_rlc_funcs(adev);
4657         gfx_v11_0_set_mqd_funcs(adev);
4658         gfx_v11_0_set_imu_funcs(adev);
4659
4660         gfx_v11_0_init_rlcg_reg_access_ctrl(adev);
4661
4662         return gfx_v11_0_init_microcode(adev);
4663 }
4664
4665 static int gfx_v11_0_late_init(void *handle)
4666 {
4667         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4668         int r;
4669
4670         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4671         if (r)
4672                 return r;
4673
4674         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4675         if (r)
4676                 return r;
4677
4678         return 0;
4679 }
4680
4681 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev)
4682 {
4683         uint32_t rlc_cntl;
4684
4685         /* if RLC is not enabled, do nothing */
4686         rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
4687         return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
4688 }
4689
4690 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
4691 {
4692         uint32_t data;
4693         unsigned i;
4694
4695         data = RLC_SAFE_MODE__CMD_MASK;
4696         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4697
4698         WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
4699
4700         /* wait for RLC_SAFE_MODE */
4701         for (i = 0; i < adev->usec_timeout; i++) {
4702                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
4703                                    RLC_SAFE_MODE, CMD))
4704                         break;
4705                 udelay(1);
4706         }
4707 }
4708
4709 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
4710 {
4711         WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
4712 }
4713
4714 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
4715                                       bool enable)
4716 {
4717         uint32_t def, data;
4718
4719         if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
4720                 return;
4721
4722         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4723
4724         if (enable)
4725                 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
4726         else
4727                 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
4728
4729         if (def != data)
4730                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4731 }
4732
4733 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev,
4734                                        bool enable)
4735 {
4736         uint32_t def, data;
4737
4738         if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
4739                 return;
4740
4741         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4742
4743         if (enable)
4744                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4745         else
4746                 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4747
4748         if (def != data)
4749                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4750 }
4751
4752 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev,
4753                                            bool enable)
4754 {
4755         uint32_t def, data;
4756
4757         if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
4758                 return;
4759
4760         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4761
4762         if (enable)
4763                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
4764         else
4765                 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
4766
4767         if (def != data)
4768                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4769 }
4770
4771 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4772                                                        bool enable)
4773 {
4774         uint32_t data, def;
4775
4776         if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
4777                 return;
4778
4779         /* It is disabled by HW by default */
4780         if (enable) {
4781                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
4782                         /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4783                         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4784
4785                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4786                                   RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4787                                   RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
4788
4789                         if (def != data)
4790                                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4791                 }
4792         } else {
4793                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
4794                         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4795
4796                         data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4797                                  RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4798                                  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
4799
4800                         if (def != data)
4801                                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4802                 }
4803         }
4804 }
4805
4806 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4807                                                        bool enable)
4808 {
4809         uint32_t def, data;
4810
4811         if (!(adev->cg_flags &
4812               (AMD_CG_SUPPORT_GFX_CGCG |
4813               AMD_CG_SUPPORT_GFX_CGLS |
4814               AMD_CG_SUPPORT_GFX_3D_CGCG |
4815               AMD_CG_SUPPORT_GFX_3D_CGLS)))
4816                 return;
4817
4818         if (enable) {
4819                 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4820
4821                 /* unset CGCG override */
4822                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
4823                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4824                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4825                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4826                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
4827                     adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4828                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4829
4830                 /* update CGCG override bits */
4831                 if (def != data)
4832                         WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4833
4834                 /* enable cgcg FSM(0x0000363F) */
4835                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
4836
4837                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
4838                         data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
4839                         data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4840                                  RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4841                 }
4842
4843                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
4844                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
4845                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4846                                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4847                 }
4848
4849                 if (def != data)
4850                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
4851
4852                 /* Program RLC_CGCG_CGLS_CTRL_3D */
4853                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
4854
4855                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
4856                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
4857                         data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4858                                  RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4859                 }
4860
4861                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
4862                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
4863                         data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4864                                  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4865                 }
4866
4867                 if (def != data)
4868                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
4869
4870                 /* set IDLE_POLL_COUNT(0x00900100) */
4871                 def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
4872
4873                 data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
4874                 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4875                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4876
4877                 if (def != data)
4878                         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
4879
4880                 data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4881                 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
4882                 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
4883                 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
4884                 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
4885                 WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
4886
4887                 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
4888                 data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
4889                 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
4890
4891                 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
4892                 if (adev->sdma.num_instances > 1) {
4893                         data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
4894                         data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
4895                         WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
4896                 }
4897         } else {
4898                 /* Program RLC_CGCG_CGLS_CTRL */
4899                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
4900
4901                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
4902                         data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4903
4904                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4905                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4906
4907                 if (def != data)
4908                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
4909
4910                 /* Program RLC_CGCG_CGLS_CTRL_3D */
4911                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
4912
4913                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4914                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4915                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4916                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4917
4918                 if (def != data)
4919                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
4920
4921                 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
4922                 data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
4923                 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
4924
4925                 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
4926                 if (adev->sdma.num_instances > 1) {
4927                         data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
4928                         data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
4929                         WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
4930                 }
4931         }
4932 }
4933
4934 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4935                                             bool enable)
4936 {
4937         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4938
4939         gfx_v11_0_update_coarse_grain_clock_gating(adev, enable);
4940
4941         gfx_v11_0_update_medium_grain_clock_gating(adev, enable);
4942
4943         gfx_v11_0_update_repeater_fgcg(adev, enable);
4944
4945         gfx_v11_0_update_sram_fgcg(adev, enable);
4946
4947         gfx_v11_0_update_perf_clk(adev, enable);
4948
4949         if (adev->cg_flags &
4950             (AMD_CG_SUPPORT_GFX_MGCG |
4951              AMD_CG_SUPPORT_GFX_CGLS |
4952              AMD_CG_SUPPORT_GFX_CGCG |
4953              AMD_CG_SUPPORT_GFX_3D_CGCG |
4954              AMD_CG_SUPPORT_GFX_3D_CGLS))
4955                 gfx_v11_0_enable_gui_idle_interrupt(adev, enable);
4956
4957         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4958
4959         return 0;
4960 }
4961
4962 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
4963 {
4964         u32 reg, data;
4965
4966         amdgpu_gfx_off_ctrl(adev, false);
4967
4968         reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
4969         if (amdgpu_sriov_is_pp_one_vf(adev))
4970                 data = RREG32_NO_KIQ(reg);
4971         else
4972                 data = RREG32(reg);
4973
4974         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
4975         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
4976
4977         if (amdgpu_sriov_is_pp_one_vf(adev))
4978                 WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
4979         else
4980                 WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
4981
4982         amdgpu_gfx_off_ctrl(adev, true);
4983 }
4984
4985 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
4986         .is_rlc_enabled = gfx_v11_0_is_rlc_enabled,
4987         .set_safe_mode = gfx_v11_0_set_safe_mode,
4988         .unset_safe_mode = gfx_v11_0_unset_safe_mode,
4989         .init = gfx_v11_0_rlc_init,
4990         .get_csb_size = gfx_v11_0_get_csb_size,
4991         .get_csb_buffer = gfx_v11_0_get_csb_buffer,
4992         .resume = gfx_v11_0_rlc_resume,
4993         .stop = gfx_v11_0_rlc_stop,
4994         .reset = gfx_v11_0_rlc_reset,
4995         .start = gfx_v11_0_rlc_start,
4996         .update_spm_vmid = gfx_v11_0_update_spm_vmid,
4997 };
4998
4999 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
5000 {
5001         u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
5002
5003         if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5004                 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5005         else
5006                 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5007
5008         WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
5009
5010         // Program RLC_PG_DELAY3 for CGPG hysteresis
5011         if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
5012                 switch (adev->ip_versions[GC_HWIP][0]) {
5013                 case IP_VERSION(11, 0, 1):
5014                 case IP_VERSION(11, 0, 4):
5015                         WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
5016                         break;
5017                 default:
5018                         break;
5019                 }
5020         }
5021 }
5022
5023 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
5024 {
5025         amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5026
5027         gfx_v11_cntl_power_gating(adev, enable);
5028
5029         amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5030 }
5031
5032 static int gfx_v11_0_set_powergating_state(void *handle,
5033                                            enum amd_powergating_state state)
5034 {
5035         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5036         bool enable = (state == AMD_PG_STATE_GATE);
5037
5038         if (amdgpu_sriov_vf(adev))
5039                 return 0;
5040
5041         switch (adev->ip_versions[GC_HWIP][0]) {
5042         case IP_VERSION(11, 0, 0):
5043         case IP_VERSION(11, 0, 2):
5044         case IP_VERSION(11, 0, 3):
5045                 amdgpu_gfx_off_ctrl(adev, enable);
5046                 break;
5047         case IP_VERSION(11, 0, 1):
5048         case IP_VERSION(11, 0, 4):
5049                 if (!enable)
5050                         amdgpu_gfx_off_ctrl(adev, false);
5051
5052                 gfx_v11_cntl_pg(adev, enable);
5053
5054                 if (enable)
5055                         amdgpu_gfx_off_ctrl(adev, true);
5056
5057                 break;
5058         default:
5059                 break;
5060         }
5061
5062         return 0;
5063 }
5064
5065 static int gfx_v11_0_set_clockgating_state(void *handle,
5066                                           enum amd_clockgating_state state)
5067 {
5068         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5069
5070         if (amdgpu_sriov_vf(adev))
5071                 return 0;
5072
5073         switch (adev->ip_versions[GC_HWIP][0]) {
5074         case IP_VERSION(11, 0, 0):
5075         case IP_VERSION(11, 0, 1):
5076         case IP_VERSION(11, 0, 2):
5077         case IP_VERSION(11, 0, 3):
5078         case IP_VERSION(11, 0, 4):
5079                 gfx_v11_0_update_gfx_clock_gating(adev,
5080                                 state ==  AMD_CG_STATE_GATE);
5081                 break;
5082         default:
5083                 break;
5084         }
5085
5086         return 0;
5087 }
5088
5089 static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags)
5090 {
5091         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5092         int data;
5093
5094         /* AMD_CG_SUPPORT_GFX_MGCG */
5095         data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5096         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5097                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5098
5099         /* AMD_CG_SUPPORT_REPEATER_FGCG */
5100         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
5101                 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
5102
5103         /* AMD_CG_SUPPORT_GFX_FGCG */
5104         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
5105                 *flags |= AMD_CG_SUPPORT_GFX_FGCG;
5106
5107         /* AMD_CG_SUPPORT_GFX_PERF_CLK */
5108         if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
5109                 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
5110
5111         /* AMD_CG_SUPPORT_GFX_CGCG */
5112         data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5113         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5114                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5115
5116         /* AMD_CG_SUPPORT_GFX_CGLS */
5117         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5118                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5119
5120         /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5121         data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5122         if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5123                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5124
5125         /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5126         if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5127                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5128 }
5129
5130 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5131 {
5132         /* gfx11 is 32bit rptr*/
5133         return *(uint32_t *)ring->rptr_cpu_addr;
5134 }
5135
5136 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5137 {
5138         struct amdgpu_device *adev = ring->adev;
5139         u64 wptr;
5140
5141         /* XXX check if swapping is necessary on BE */
5142         if (ring->use_doorbell) {
5143                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5144         } else {
5145                 wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
5146                 wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
5147         }
5148
5149         return wptr;
5150 }
5151
5152 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5153 {
5154         struct amdgpu_device *adev = ring->adev;
5155         uint32_t *wptr_saved;
5156         uint32_t *is_queue_unmap;
5157         uint64_t aggregated_db_index;
5158         uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
5159         uint64_t wptr_tmp;
5160
5161         if (ring->is_mes_queue) {
5162                 wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
5163                 is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
5164                                               sizeof(uint32_t));
5165                 aggregated_db_index =
5166                         amdgpu_mes_get_aggregated_doorbell_index(adev,
5167                                                                  ring->hw_prio);
5168
5169                 wptr_tmp = ring->wptr & ring->buf_mask;
5170                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
5171                 *wptr_saved = wptr_tmp;
5172                 /* assume doorbell always being used by mes mapped queue */
5173                 if (*is_queue_unmap) {
5174                         WDOORBELL64(aggregated_db_index, wptr_tmp);
5175                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
5176                 } else {
5177                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
5178
5179                         if (*is_queue_unmap)
5180                                 WDOORBELL64(aggregated_db_index, wptr_tmp);
5181                 }
5182         } else {
5183                 if (ring->use_doorbell) {
5184                         /* XXX check if swapping is necessary on BE */
5185                         atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5186                                      ring->wptr);
5187                         WDOORBELL64(ring->doorbell_index, ring->wptr);
5188                 } else {
5189                         WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
5190                                      lower_32_bits(ring->wptr));
5191                         WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
5192                                      upper_32_bits(ring->wptr));
5193                 }
5194         }
5195 }
5196
5197 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5198 {
5199         /* gfx11 hardware is 32bit rptr */
5200         return *(uint32_t *)ring->rptr_cpu_addr;
5201 }
5202
5203 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5204 {
5205         u64 wptr;
5206
5207         /* XXX check if swapping is necessary on BE */
5208         if (ring->use_doorbell)
5209                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5210         else
5211                 BUG();
5212         return wptr;
5213 }
5214
5215 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5216 {
5217         struct amdgpu_device *adev = ring->adev;
5218         uint32_t *wptr_saved;
5219         uint32_t *is_queue_unmap;
5220         uint64_t aggregated_db_index;
5221         uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
5222         uint64_t wptr_tmp;
5223
5224         if (ring->is_mes_queue) {
5225                 wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
5226                 is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
5227                                               sizeof(uint32_t));
5228                 aggregated_db_index =
5229                         amdgpu_mes_get_aggregated_doorbell_index(adev,
5230                                                                  ring->hw_prio);
5231
5232                 wptr_tmp = ring->wptr & ring->buf_mask;
5233                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
5234                 *wptr_saved = wptr_tmp;
5235                 /* assume doorbell always used by mes mapped queue */
5236                 if (*is_queue_unmap) {
5237                         WDOORBELL64(aggregated_db_index, wptr_tmp);
5238                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
5239                 } else {
5240                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
5241
5242                         if (*is_queue_unmap)
5243                                 WDOORBELL64(aggregated_db_index, wptr_tmp);
5244                 }
5245         } else {
5246                 /* XXX check if swapping is necessary on BE */
5247                 if (ring->use_doorbell) {
5248                         atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5249                                      ring->wptr);
5250                         WDOORBELL64(ring->doorbell_index, ring->wptr);
5251                 } else {
5252                         BUG(); /* only DOORBELL method supported on gfx11 now */
5253                 }
5254         }
5255 }
5256
5257 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5258 {
5259         struct amdgpu_device *adev = ring->adev;
5260         u32 ref_and_mask, reg_mem_engine;
5261         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5262
5263         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5264                 switch (ring->me) {
5265                 case 1:
5266                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5267                         break;
5268                 case 2:
5269                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5270                         break;
5271                 default:
5272                         return;
5273                 }
5274                 reg_mem_engine = 0;
5275         } else {
5276                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5277                 reg_mem_engine = 1; /* pfp */
5278         }
5279
5280         gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5281                                adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5282                                adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5283                                ref_and_mask, ref_and_mask, 0x20);
5284 }
5285
5286 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5287                                        struct amdgpu_job *job,
5288                                        struct amdgpu_ib *ib,
5289                                        uint32_t flags)
5290 {
5291         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5292         u32 header, control = 0;
5293
5294         BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE);
5295
5296         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5297
5298         control |= ib->length_dw | (vmid << 24);
5299
5300         if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5301                 control |= INDIRECT_BUFFER_PRE_ENB(1);
5302
5303                 if (flags & AMDGPU_IB_PREEMPTED)
5304                         control |= INDIRECT_BUFFER_PRE_RESUME(1);
5305
5306                 if (vmid)
5307                         gfx_v11_0_ring_emit_de_meta(ring,
5308                                     (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
5309         }
5310
5311         if (ring->is_mes_queue)
5312                 /* inherit vmid from mqd */
5313                 control |= 0x400000;
5314
5315         amdgpu_ring_write(ring, header);
5316         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5317         amdgpu_ring_write(ring,
5318 #ifdef __BIG_ENDIAN
5319                 (2 << 0) |
5320 #endif
5321                 lower_32_bits(ib->gpu_addr));
5322         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5323         amdgpu_ring_write(ring, control);
5324 }
5325
5326 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5327                                            struct amdgpu_job *job,
5328                                            struct amdgpu_ib *ib,
5329                                            uint32_t flags)
5330 {
5331         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5332         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5333
5334         if (ring->is_mes_queue)
5335                 /* inherit vmid from mqd */
5336                 control |= 0x40000000;
5337
5338         /* Currently, there is a high possibility to get wave ID mismatch
5339          * between ME and GDS, leading to a hw deadlock, because ME generates
5340          * different wave IDs than the GDS expects. This situation happens
5341          * randomly when at least 5 compute pipes use GDS ordered append.
5342          * The wave IDs generated by ME are also wrong after suspend/resume.
5343          * Those are probably bugs somewhere else in the kernel driver.
5344          *
5345          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5346          * GDS to 0 for this ring (me/pipe).
5347          */
5348         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5349                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5350                 amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
5351                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5352         }
5353
5354         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5355         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5356         amdgpu_ring_write(ring,
5357 #ifdef __BIG_ENDIAN
5358                                 (2 << 0) |
5359 #endif
5360                                 lower_32_bits(ib->gpu_addr));
5361         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5362         amdgpu_ring_write(ring, control);
5363 }
5364
5365 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5366                                      u64 seq, unsigned flags)
5367 {
5368         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5369         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5370
5371         /* RELEASE_MEM - flush caches, send int */
5372         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5373         amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
5374                                  PACKET3_RELEASE_MEM_GCR_GL2_WB |
5375                                  PACKET3_RELEASE_MEM_GCR_GL2_INV |
5376                                  PACKET3_RELEASE_MEM_GCR_GL2_US |
5377                                  PACKET3_RELEASE_MEM_GCR_GL1_INV |
5378                                  PACKET3_RELEASE_MEM_GCR_GLV_INV |
5379                                  PACKET3_RELEASE_MEM_GCR_GLM_INV |
5380                                  PACKET3_RELEASE_MEM_GCR_GLM_WB |
5381                                  PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
5382                                  PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5383                                  PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
5384         amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
5385                                  PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
5386
5387         /*
5388          * the address should be Qword aligned if 64bit write, Dword
5389          * aligned if only send 32bit data low (discard data high)
5390          */
5391         if (write64bit)
5392                 BUG_ON(addr & 0x7);
5393         else
5394                 BUG_ON(addr & 0x3);
5395         amdgpu_ring_write(ring, lower_32_bits(addr));
5396         amdgpu_ring_write(ring, upper_32_bits(addr));
5397         amdgpu_ring_write(ring, lower_32_bits(seq));
5398         amdgpu_ring_write(ring, upper_32_bits(seq));
5399         amdgpu_ring_write(ring, ring->is_mes_queue ?
5400                          (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
5401 }
5402
5403 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5404 {
5405         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5406         uint32_t seq = ring->fence_drv.sync_seq;
5407         uint64_t addr = ring->fence_drv.gpu_addr;
5408
5409         gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
5410                                upper_32_bits(addr), seq, 0xffffffff, 4);
5411 }
5412
5413 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
5414                                    uint16_t pasid, uint32_t flush_type,
5415                                    bool all_hub, uint8_t dst_sel)
5416 {
5417         amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
5418         amdgpu_ring_write(ring,
5419                           PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
5420                           PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
5421                           PACKET3_INVALIDATE_TLBS_PASID(pasid) |
5422                           PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
5423 }
5424
5425 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5426                                          unsigned vmid, uint64_t pd_addr)
5427 {
5428         if (ring->is_mes_queue)
5429                 gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
5430         else
5431                 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5432
5433         /* compute doesn't have PFP */
5434         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5435                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5436                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5437                 amdgpu_ring_write(ring, 0x0);
5438         }
5439 }
5440
5441 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5442                                           u64 seq, unsigned int flags)
5443 {
5444         struct amdgpu_device *adev = ring->adev;
5445
5446         /* we only allocate 32bit for each seq wb address */
5447         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5448
5449         /* write fence seq to the "addr" */
5450         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5451         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5452                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5453         amdgpu_ring_write(ring, lower_32_bits(addr));
5454         amdgpu_ring_write(ring, upper_32_bits(addr));
5455         amdgpu_ring_write(ring, lower_32_bits(seq));
5456
5457         if (flags & AMDGPU_FENCE_FLAG_INT) {
5458                 /* set register to trigger INT */
5459                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5460                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5461                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5462                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
5463                 amdgpu_ring_write(ring, 0);
5464                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5465         }
5466 }
5467
5468 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
5469                                          uint32_t flags)
5470 {
5471         uint32_t dw2 = 0;
5472
5473         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5474         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5475                 /* set load_global_config & load_global_uconfig */
5476                 dw2 |= 0x8001;
5477                 /* set load_cs_sh_regs */
5478                 dw2 |= 0x01000000;
5479                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5480                 dw2 |= 0x10002;
5481         }
5482
5483         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5484         amdgpu_ring_write(ring, dw2);
5485         amdgpu_ring_write(ring, 0);
5486 }
5487
5488 static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
5489                                            u64 shadow_va, u64 csa_va,
5490                                            u64 gds_va, bool init_shadow,
5491                                            int vmid)
5492 {
5493         struct amdgpu_device *adev = ring->adev;
5494
5495         if (!adev->gfx.cp_gfx_shadow)
5496                 return;
5497
5498         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7));
5499         amdgpu_ring_write(ring, lower_32_bits(shadow_va));
5500         amdgpu_ring_write(ring, upper_32_bits(shadow_va));
5501         amdgpu_ring_write(ring, lower_32_bits(gds_va));
5502         amdgpu_ring_write(ring, upper_32_bits(gds_va));
5503         amdgpu_ring_write(ring, lower_32_bits(csa_va));
5504         amdgpu_ring_write(ring, upper_32_bits(csa_va));
5505         amdgpu_ring_write(ring, shadow_va ?
5506                           PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0);
5507         amdgpu_ring_write(ring, init_shadow ?
5508                           PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
5509 }
5510
5511 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5512 {
5513         unsigned ret;
5514
5515         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5516         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5517         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5518         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5519         ret = ring->wptr & ring->buf_mask;
5520         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5521
5522         return ret;
5523 }
5524
5525 static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5526 {
5527         unsigned cur;
5528         BUG_ON(offset > ring->buf_mask);
5529         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5530
5531         cur = (ring->wptr - 1) & ring->buf_mask;
5532         if (likely(cur > offset))
5533                 ring->ring[offset] = cur - offset;
5534         else
5535                 ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
5536 }
5537
5538 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
5539 {
5540         int i, r = 0;
5541         struct amdgpu_device *adev = ring->adev;
5542         struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
5543         struct amdgpu_ring *kiq_ring = &kiq->ring;
5544         unsigned long flags;
5545
5546         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5547                 return -EINVAL;
5548
5549         spin_lock_irqsave(&kiq->ring_lock, flags);
5550
5551         if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5552                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
5553                 return -ENOMEM;
5554         }
5555
5556         /* assert preemption condition */
5557         amdgpu_ring_set_preempt_cond_exec(ring, false);
5558
5559         /* assert IB preemption, emit the trailing fence */
5560         kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5561                                    ring->trail_fence_gpu_addr,
5562                                    ++ring->trail_seq);
5563         amdgpu_ring_commit(kiq_ring);
5564
5565         spin_unlock_irqrestore(&kiq->ring_lock, flags);
5566
5567         /* poll the trailing fence */
5568         for (i = 0; i < adev->usec_timeout; i++) {
5569                 if (ring->trail_seq ==
5570                     le32_to_cpu(*(ring->trail_fence_cpu_addr)))
5571                         break;
5572                 udelay(1);
5573         }
5574
5575         if (i >= adev->usec_timeout) {
5576                 r = -EINVAL;
5577                 DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
5578         }
5579
5580         /* deassert preemption condition */
5581         amdgpu_ring_set_preempt_cond_exec(ring, true);
5582         return r;
5583 }
5584
5585 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
5586 {
5587         struct amdgpu_device *adev = ring->adev;
5588         struct v10_de_ib_state de_payload = {0};
5589         uint64_t offset, gds_addr, de_payload_gpu_addr;
5590         void *de_payload_cpu_addr;
5591         int cnt;
5592
5593         if (ring->is_mes_queue) {
5594                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5595                                   gfx[0].gfx_meta_data) +
5596                         offsetof(struct v10_gfx_meta_data, de_payload);
5597                 de_payload_gpu_addr =
5598                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5599                 de_payload_cpu_addr =
5600                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5601
5602                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5603                                   gfx[0].gds_backup) +
5604                         offsetof(struct v10_gfx_meta_data, de_payload);
5605                 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5606         } else {
5607                 offset = offsetof(struct v10_gfx_meta_data, de_payload);
5608                 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5609                 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5610
5611                 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5612                                  AMDGPU_CSA_SIZE - adev->gds.gds_size,
5613                                  PAGE_SIZE);
5614         }
5615
5616         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5617         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5618
5619         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5620         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5621         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5622                                  WRITE_DATA_DST_SEL(8) |
5623                                  WR_CONFIRM) |
5624                                  WRITE_DATA_CACHE_POLICY(0));
5625         amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5626         amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5627
5628         if (resume)
5629                 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5630                                            sizeof(de_payload) >> 2);
5631         else
5632                 amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5633                                            sizeof(de_payload) >> 2);
5634 }
5635
5636 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5637                                     bool secure)
5638 {
5639         uint32_t v = secure ? FRAME_TMZ : 0;
5640
5641         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5642         amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5643 }
5644
5645 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5646                                      uint32_t reg_val_offs)
5647 {
5648         struct amdgpu_device *adev = ring->adev;
5649
5650         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5651         amdgpu_ring_write(ring, 0 |     /* src: register*/
5652                                 (5 << 8) |      /* dst: memory */
5653                                 (1 << 20));     /* write confirm */
5654         amdgpu_ring_write(ring, reg);
5655         amdgpu_ring_write(ring, 0);
5656         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5657                                 reg_val_offs * 4));
5658         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5659                                 reg_val_offs * 4));
5660 }
5661
5662 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5663                                    uint32_t val)
5664 {
5665         uint32_t cmd = 0;
5666
5667         switch (ring->funcs->type) {
5668         case AMDGPU_RING_TYPE_GFX:
5669                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5670                 break;
5671         case AMDGPU_RING_TYPE_KIQ:
5672                 cmd = (1 << 16); /* no inc addr */
5673                 break;
5674         default:
5675                 cmd = WR_CONFIRM;
5676                 break;
5677         }
5678         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5679         amdgpu_ring_write(ring, cmd);
5680         amdgpu_ring_write(ring, reg);
5681         amdgpu_ring_write(ring, 0);
5682         amdgpu_ring_write(ring, val);
5683 }
5684
5685 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5686                                         uint32_t val, uint32_t mask)
5687 {
5688         gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5689 }
5690
5691 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5692                                                    uint32_t reg0, uint32_t reg1,
5693                                                    uint32_t ref, uint32_t mask)
5694 {
5695         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5696
5697         gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5698                                ref, mask, 0x20);
5699 }
5700
5701 static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring,
5702                                          unsigned vmid)
5703 {
5704         struct amdgpu_device *adev = ring->adev;
5705         uint32_t value = 0;
5706
5707         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5708         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5709         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5710         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5711         WREG32_SOC15(GC, 0, regSQ_CMD, value);
5712 }
5713
5714 static void
5715 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5716                                       uint32_t me, uint32_t pipe,
5717                                       enum amdgpu_interrupt_state state)
5718 {
5719         uint32_t cp_int_cntl, cp_int_cntl_reg;
5720
5721         if (!me) {
5722                 switch (pipe) {
5723                 case 0:
5724                         cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
5725                         break;
5726                 case 1:
5727                         cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
5728                         break;
5729                 default:
5730                         DRM_DEBUG("invalid pipe %d\n", pipe);
5731                         return;
5732                 }
5733         } else {
5734                 DRM_DEBUG("invalid me %d\n", me);
5735                 return;
5736         }
5737
5738         switch (state) {
5739         case AMDGPU_IRQ_STATE_DISABLE:
5740                 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
5741                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5742                                             TIME_STAMP_INT_ENABLE, 0);
5743                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5744                                             GENERIC0_INT_ENABLE, 0);
5745                 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
5746                 break;
5747         case AMDGPU_IRQ_STATE_ENABLE:
5748                 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
5749                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5750                                             TIME_STAMP_INT_ENABLE, 1);
5751                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5752                                             GENERIC0_INT_ENABLE, 1);
5753                 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
5754                 break;
5755         default:
5756                 break;
5757         }
5758 }
5759
5760 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5761                                                      int me, int pipe,
5762                                                      enum amdgpu_interrupt_state state)
5763 {
5764         u32 mec_int_cntl, mec_int_cntl_reg;
5765
5766         /*
5767          * amdgpu controls only the first MEC. That's why this function only
5768          * handles the setting of interrupts for this specific MEC. All other
5769          * pipes' interrupts are set by amdkfd.
5770          */
5771
5772         if (me == 1) {
5773                 switch (pipe) {
5774                 case 0:
5775                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
5776                         break;
5777                 case 1:
5778                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
5779                         break;
5780                 case 2:
5781                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
5782                         break;
5783                 case 3:
5784                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
5785                         break;
5786                 default:
5787                         DRM_DEBUG("invalid pipe %d\n", pipe);
5788                         return;
5789                 }
5790         } else {
5791                 DRM_DEBUG("invalid me %d\n", me);
5792                 return;
5793         }
5794
5795         switch (state) {
5796         case AMDGPU_IRQ_STATE_DISABLE:
5797                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5798                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5799                                              TIME_STAMP_INT_ENABLE, 0);
5800                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5801                                              GENERIC0_INT_ENABLE, 0);
5802                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5803                 break;
5804         case AMDGPU_IRQ_STATE_ENABLE:
5805                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5806                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5807                                              TIME_STAMP_INT_ENABLE, 1);
5808                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5809                                              GENERIC0_INT_ENABLE, 1);
5810                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5811                 break;
5812         default:
5813                 break;
5814         }
5815 }
5816
5817 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5818                                             struct amdgpu_irq_src *src,
5819                                             unsigned type,
5820                                             enum amdgpu_interrupt_state state)
5821 {
5822         switch (type) {
5823         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5824                 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
5825                 break;
5826         case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
5827                 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
5828                 break;
5829         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5830                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5831                 break;
5832         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5833                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5834                 break;
5835         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5836                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5837                 break;
5838         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5839                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5840                 break;
5841         default:
5842                 break;
5843         }
5844         return 0;
5845 }
5846
5847 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
5848                              struct amdgpu_irq_src *source,
5849                              struct amdgpu_iv_entry *entry)
5850 {
5851         int i;
5852         u8 me_id, pipe_id, queue_id;
5853         struct amdgpu_ring *ring;
5854         uint32_t mes_queue_id = entry->src_data[0];
5855
5856         DRM_DEBUG("IH: CP EOP\n");
5857
5858         if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
5859                 struct amdgpu_mes_queue *queue;
5860
5861                 mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
5862
5863                 spin_lock(&adev->mes.queue_id_lock);
5864                 queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
5865                 if (queue) {
5866                         DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
5867                         amdgpu_fence_process(queue->ring);
5868                 }
5869                 spin_unlock(&adev->mes.queue_id_lock);
5870         } else {
5871                 me_id = (entry->ring_id & 0x0c) >> 2;
5872                 pipe_id = (entry->ring_id & 0x03) >> 0;
5873                 queue_id = (entry->ring_id & 0x70) >> 4;
5874
5875                 switch (me_id) {
5876                 case 0:
5877                         if (pipe_id == 0)
5878                                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5879                         else
5880                                 amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
5881                         break;
5882                 case 1:
5883                 case 2:
5884                         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5885                                 ring = &adev->gfx.compute_ring[i];
5886                                 /* Per-queue interrupt is supported for MEC starting from VI.
5887                                  * The interrupt can only be enabled/disabled per pipe instead
5888                                  * of per queue.
5889                                  */
5890                                 if ((ring->me == me_id) &&
5891                                     (ring->pipe == pipe_id) &&
5892                                     (ring->queue == queue_id))
5893                                         amdgpu_fence_process(ring);
5894                         }
5895                         break;
5896                 }
5897         }
5898
5899         return 0;
5900 }
5901
5902 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5903                                               struct amdgpu_irq_src *source,
5904                                               unsigned type,
5905                                               enum amdgpu_interrupt_state state)
5906 {
5907         switch (state) {
5908         case AMDGPU_IRQ_STATE_DISABLE:
5909         case AMDGPU_IRQ_STATE_ENABLE:
5910                 WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
5911                                PRIV_REG_INT_ENABLE,
5912                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5913                 break;
5914         default:
5915                 break;
5916         }
5917
5918         return 0;
5919 }
5920
5921 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5922                                                struct amdgpu_irq_src *source,
5923                                                unsigned type,
5924                                                enum amdgpu_interrupt_state state)
5925 {
5926         switch (state) {
5927         case AMDGPU_IRQ_STATE_DISABLE:
5928         case AMDGPU_IRQ_STATE_ENABLE:
5929                 WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
5930                                PRIV_INSTR_INT_ENABLE,
5931                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5932                 break;
5933         default:
5934                 break;
5935         }
5936
5937         return 0;
5938 }
5939
5940 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
5941                                         struct amdgpu_iv_entry *entry)
5942 {
5943         u8 me_id, pipe_id, queue_id;
5944         struct amdgpu_ring *ring;
5945         int i;
5946
5947         me_id = (entry->ring_id & 0x0c) >> 2;
5948         pipe_id = (entry->ring_id & 0x03) >> 0;
5949         queue_id = (entry->ring_id & 0x70) >> 4;
5950
5951         switch (me_id) {
5952         case 0:
5953                 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
5954                         ring = &adev->gfx.gfx_ring[i];
5955                         /* we only enabled 1 gfx queue per pipe for now */
5956                         if (ring->me == me_id && ring->pipe == pipe_id)
5957                                 drm_sched_fault(&ring->sched);
5958                 }
5959                 break;
5960         case 1:
5961         case 2:
5962                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5963                         ring = &adev->gfx.compute_ring[i];
5964                         if (ring->me == me_id && ring->pipe == pipe_id &&
5965                             ring->queue == queue_id)
5966                                 drm_sched_fault(&ring->sched);
5967                 }
5968                 break;
5969         default:
5970                 BUG();
5971                 break;
5972         }
5973 }
5974
5975 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
5976                                   struct amdgpu_irq_src *source,
5977                                   struct amdgpu_iv_entry *entry)
5978 {
5979         DRM_ERROR("Illegal register access in command stream\n");
5980         gfx_v11_0_handle_priv_fault(adev, entry);
5981         return 0;
5982 }
5983
5984 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev,
5985                                    struct amdgpu_irq_src *source,
5986                                    struct amdgpu_iv_entry *entry)
5987 {
5988         DRM_ERROR("Illegal instruction in command stream\n");
5989         gfx_v11_0_handle_priv_fault(adev, entry);
5990         return 0;
5991 }
5992
5993 static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev,
5994                                   struct amdgpu_irq_src *source,
5995                                   struct amdgpu_iv_entry *entry)
5996 {
5997         if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq)
5998                 return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry);
5999
6000         return 0;
6001 }
6002
6003 #if 0
6004 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6005                                              struct amdgpu_irq_src *src,
6006                                              unsigned int type,
6007                                              enum amdgpu_interrupt_state state)
6008 {
6009         uint32_t tmp, target;
6010         struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
6011
6012         target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
6013         target += ring->pipe;
6014
6015         switch (type) {
6016         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6017                 if (state == AMDGPU_IRQ_STATE_DISABLE) {
6018                         tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6019                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6020                                             GENERIC2_INT_ENABLE, 0);
6021                         WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6022
6023                         tmp = RREG32_SOC15_IP(GC, target);
6024                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6025                                             GENERIC2_INT_ENABLE, 0);
6026                         WREG32_SOC15_IP(GC, target, tmp);
6027                 } else {
6028                         tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6029                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6030                                             GENERIC2_INT_ENABLE, 1);
6031                         WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6032
6033                         tmp = RREG32_SOC15_IP(GC, target);
6034                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6035                                             GENERIC2_INT_ENABLE, 1);
6036                         WREG32_SOC15_IP(GC, target, tmp);
6037                 }
6038                 break;
6039         default:
6040                 BUG(); /* kiq only support GENERIC2_INT now */
6041                 break;
6042         }
6043         return 0;
6044 }
6045 #endif
6046
6047 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
6048 {
6049         const unsigned int gcr_cntl =
6050                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
6051                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
6052                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
6053                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
6054                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
6055                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
6056                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
6057                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
6058
6059         /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
6060         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
6061         amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
6062         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6063         amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6064         amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6065         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6066         amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6067         amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
6068 }
6069
6070 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
6071         .name = "gfx_v11_0",
6072         .early_init = gfx_v11_0_early_init,
6073         .late_init = gfx_v11_0_late_init,
6074         .sw_init = gfx_v11_0_sw_init,
6075         .sw_fini = gfx_v11_0_sw_fini,
6076         .hw_init = gfx_v11_0_hw_init,
6077         .hw_fini = gfx_v11_0_hw_fini,
6078         .suspend = gfx_v11_0_suspend,
6079         .resume = gfx_v11_0_resume,
6080         .is_idle = gfx_v11_0_is_idle,
6081         .wait_for_idle = gfx_v11_0_wait_for_idle,
6082         .soft_reset = gfx_v11_0_soft_reset,
6083         .check_soft_reset = gfx_v11_0_check_soft_reset,
6084         .post_soft_reset = gfx_v11_0_post_soft_reset,
6085         .set_clockgating_state = gfx_v11_0_set_clockgating_state,
6086         .set_powergating_state = gfx_v11_0_set_powergating_state,
6087         .get_clockgating_state = gfx_v11_0_get_clockgating_state,
6088 };
6089
6090 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
6091         .type = AMDGPU_RING_TYPE_GFX,
6092         .align_mask = 0xff,
6093         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6094         .support_64bit_ptrs = true,
6095         .secure_submission_supported = true,
6096         .get_rptr = gfx_v11_0_ring_get_rptr_gfx,
6097         .get_wptr = gfx_v11_0_ring_get_wptr_gfx,
6098         .set_wptr = gfx_v11_0_ring_set_wptr_gfx,
6099         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6100                 5 + /* COND_EXEC */
6101                 9 + /* SET_Q_PREEMPTION_MODE */
6102                 7 + /* PIPELINE_SYNC */
6103                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6104                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6105                 2 + /* VM_FLUSH */
6106                 8 + /* FENCE for VM_FLUSH */
6107                 20 + /* GDS switch */
6108                 5 + /* COND_EXEC */
6109                 7 + /* HDP_flush */
6110                 4 + /* VGT_flush */
6111                 31 + /* DE_META */
6112                 3 + /* CNTX_CTRL */
6113                 5 + /* HDP_INVL */
6114                 8 + 8 + /* FENCE x2 */
6115                 8, /* gfx_v11_0_emit_mem_sync */
6116         .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */
6117         .emit_ib = gfx_v11_0_ring_emit_ib_gfx,
6118         .emit_fence = gfx_v11_0_ring_emit_fence,
6119         .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6120         .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6121         .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6122         .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6123         .test_ring = gfx_v11_0_ring_test_ring,
6124         .test_ib = gfx_v11_0_ring_test_ib,
6125         .insert_nop = amdgpu_ring_insert_nop,
6126         .pad_ib = amdgpu_ring_generic_pad_ib,
6127         .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
6128         .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow,
6129         .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
6130         .patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec,
6131         .preempt_ib = gfx_v11_0_ring_preempt_ib,
6132         .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
6133         .emit_wreg = gfx_v11_0_ring_emit_wreg,
6134         .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6135         .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6136         .soft_recovery = gfx_v11_0_ring_soft_recovery,
6137         .emit_mem_sync = gfx_v11_0_emit_mem_sync,
6138 };
6139
6140 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
6141         .type = AMDGPU_RING_TYPE_COMPUTE,
6142         .align_mask = 0xff,
6143         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6144         .support_64bit_ptrs = true,
6145         .get_rptr = gfx_v11_0_ring_get_rptr_compute,
6146         .get_wptr = gfx_v11_0_ring_get_wptr_compute,
6147         .set_wptr = gfx_v11_0_ring_set_wptr_compute,
6148         .emit_frame_size =
6149                 20 + /* gfx_v11_0_ring_emit_gds_switch */
6150                 7 + /* gfx_v11_0_ring_emit_hdp_flush */
6151                 5 + /* hdp invalidate */
6152                 7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6153                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6154                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6155                 2 + /* gfx_v11_0_ring_emit_vm_flush */
6156                 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
6157                 8, /* gfx_v11_0_emit_mem_sync */
6158         .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
6159         .emit_ib = gfx_v11_0_ring_emit_ib_compute,
6160         .emit_fence = gfx_v11_0_ring_emit_fence,
6161         .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6162         .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6163         .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6164         .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6165         .test_ring = gfx_v11_0_ring_test_ring,
6166         .test_ib = gfx_v11_0_ring_test_ib,
6167         .insert_nop = amdgpu_ring_insert_nop,
6168         .pad_ib = amdgpu_ring_generic_pad_ib,
6169         .emit_wreg = gfx_v11_0_ring_emit_wreg,
6170         .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6171         .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6172         .emit_mem_sync = gfx_v11_0_emit_mem_sync,
6173 };
6174
6175 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
6176         .type = AMDGPU_RING_TYPE_KIQ,
6177         .align_mask = 0xff,
6178         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6179         .support_64bit_ptrs = true,
6180         .get_rptr = gfx_v11_0_ring_get_rptr_compute,
6181         .get_wptr = gfx_v11_0_ring_get_wptr_compute,
6182         .set_wptr = gfx_v11_0_ring_set_wptr_compute,
6183         .emit_frame_size =
6184                 20 + /* gfx_v11_0_ring_emit_gds_switch */
6185                 7 + /* gfx_v11_0_ring_emit_hdp_flush */
6186                 5 + /*hdp invalidate */
6187                 7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6188                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6189                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6190                 2 + /* gfx_v11_0_ring_emit_vm_flush */
6191                 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6192         .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
6193         .emit_ib = gfx_v11_0_ring_emit_ib_compute,
6194         .emit_fence = gfx_v11_0_ring_emit_fence_kiq,
6195         .test_ring = gfx_v11_0_ring_test_ring,
6196         .test_ib = gfx_v11_0_ring_test_ib,
6197         .insert_nop = amdgpu_ring_insert_nop,
6198         .pad_ib = amdgpu_ring_generic_pad_ib,
6199         .emit_rreg = gfx_v11_0_ring_emit_rreg,
6200         .emit_wreg = gfx_v11_0_ring_emit_wreg,
6201         .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6202         .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6203 };
6204
6205 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
6206 {
6207         int i;
6208
6209         adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq;
6210
6211         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6212                 adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx;
6213
6214         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6215                 adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute;
6216 }
6217
6218 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = {
6219         .set = gfx_v11_0_set_eop_interrupt_state,
6220         .process = gfx_v11_0_eop_irq,
6221 };
6222
6223 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = {
6224         .set = gfx_v11_0_set_priv_reg_fault_state,
6225         .process = gfx_v11_0_priv_reg_irq,
6226 };
6227
6228 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
6229         .set = gfx_v11_0_set_priv_inst_fault_state,
6230         .process = gfx_v11_0_priv_inst_irq,
6231 };
6232
6233 static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = {
6234         .process = gfx_v11_0_rlc_gc_fed_irq,
6235 };
6236
6237 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
6238 {
6239         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6240         adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs;
6241
6242         adev->gfx.priv_reg_irq.num_types = 1;
6243         adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs;
6244
6245         adev->gfx.priv_inst_irq.num_types = 1;
6246         adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
6247
6248         adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */
6249         adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs;
6250
6251 }
6252
6253 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
6254 {
6255         if (adev->flags & AMD_IS_APU)
6256                 adev->gfx.imu.mode = MISSION_MODE;
6257         else
6258                 adev->gfx.imu.mode = DEBUG_MODE;
6259
6260         adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
6261 }
6262
6263 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev)
6264 {
6265         adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs;
6266 }
6267
6268 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev)
6269 {
6270         unsigned total_cu = adev->gfx.config.max_cu_per_sh *
6271                             adev->gfx.config.max_sh_per_se *
6272                             adev->gfx.config.max_shader_engines;
6273
6274         adev->gds.gds_size = 0x1000;
6275         adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
6276         adev->gds.gws_size = 64;
6277         adev->gds.oa_size = 16;
6278 }
6279
6280 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev)
6281 {
6282         /* set gfx eng mqd */
6283         adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
6284                 sizeof(struct v11_gfx_mqd);
6285         adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
6286                 gfx_v11_0_gfx_mqd_init;
6287         /* set compute eng mqd */
6288         adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
6289                 sizeof(struct v11_compute_mqd);
6290         adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
6291                 gfx_v11_0_compute_mqd_init;
6292 }
6293
6294 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
6295                                                           u32 bitmap)
6296 {
6297         u32 data;
6298
6299         if (!bitmap)
6300                 return;
6301
6302         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
6303         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
6304
6305         WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
6306 }
6307
6308 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
6309 {
6310         u32 data, wgp_bitmask;
6311         data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
6312         data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
6313
6314         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
6315         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
6316
6317         wgp_bitmask =
6318                 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
6319
6320         return (~data) & wgp_bitmask;
6321 }
6322
6323 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
6324 {
6325         u32 wgp_idx, wgp_active_bitmap;
6326         u32 cu_bitmap_per_wgp, cu_active_bitmap;
6327
6328         wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev);
6329         cu_active_bitmap = 0;
6330
6331         for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
6332                 /* if there is one WGP enabled, it means 2 CUs will be enabled */
6333                 cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
6334                 if (wgp_active_bitmap & (1 << wgp_idx))
6335                         cu_active_bitmap |= cu_bitmap_per_wgp;
6336         }
6337
6338         return cu_active_bitmap;
6339 }
6340
6341 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
6342                                  struct amdgpu_cu_info *cu_info)
6343 {
6344         int i, j, k, counter, active_cu_number = 0;
6345         u32 mask, bitmap;
6346         unsigned disable_masks[8 * 2];
6347
6348         if (!adev || !cu_info)
6349                 return -EINVAL;
6350
6351         amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
6352
6353         mutex_lock(&adev->grbm_idx_mutex);
6354         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6355                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6356                         mask = 1;
6357                         counter = 0;
6358                         gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0);
6359                         if (i < 8 && j < 2)
6360                                 gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(
6361                                         adev, disable_masks[i * 2 + j]);
6362                         bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev);
6363
6364                         /**
6365                          * GFX11 could support more than 4 SEs, while the bitmap
6366                          * in cu_info struct is 4x4 and ioctl interface struct
6367                          * drm_amdgpu_info_device should keep stable.
6368                          * So we use last two columns of bitmap to store cu mask for
6369                          * SEs 4 to 7, the layout of the bitmap is as below:
6370                          *    SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
6371                          *    SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
6372                          *    SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
6373                          *    SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
6374                          *    SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
6375                          *    SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
6376                          *    SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
6377                          *    SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
6378                          */
6379                         cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
6380
6381                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
6382                                 if (bitmap & mask)
6383                                         counter++;
6384
6385                                 mask <<= 1;
6386                         }
6387                         active_cu_number += counter;
6388                 }
6389         }
6390         gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
6391         mutex_unlock(&adev->grbm_idx_mutex);
6392
6393         cu_info->number = active_cu_number;
6394         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6395
6396         return 0;
6397 }
6398
6399 const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
6400 {
6401         .type = AMD_IP_BLOCK_TYPE_GFX,
6402         .major = 11,
6403         .minor = 0,
6404         .rev = 0,
6405         .funcs = &gfx_v11_0_ip_funcs,
6406 };