90e739d9aeee71aa951fc28b0836433536125e08
[platform/kernel/linux-starfive.git] / drivers / gpu / drm / amd / amdgpu / gfx_v11_0.c
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/delay.h>
24 #include <linux/kernel.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include "amdgpu.h"
29 #include "amdgpu_gfx.h"
30 #include "amdgpu_psp.h"
31 #include "amdgpu_smu.h"
32 #include "amdgpu_atomfirmware.h"
33 #include "imu_v11_0.h"
34 #include "soc21.h"
35 #include "nvd.h"
36
37 #include "gc/gc_11_0_0_offset.h"
38 #include "gc/gc_11_0_0_sh_mask.h"
39 #include "smuio/smuio_13_0_6_offset.h"
40 #include "smuio/smuio_13_0_6_sh_mask.h"
41 #include "navi10_enum.h"
42 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
43
44 #include "soc15.h"
45 #include "soc15d.h"
46 #include "clearstate_gfx11.h"
47 #include "v11_structs.h"
48 #include "gfx_v11_0.h"
49 #include "nbio_v4_3.h"
50 #include "mes_v11_0.h"
51
52 #define GFX11_NUM_GFX_RINGS             1
53 #define GFX11_MEC_HPD_SIZE      2048
54
55 #define RLCG_UCODE_LOADING_START_ADDRESS        0x00002000L
56 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1        0x1388
57
58 #define regCGTT_WD_CLK_CTRL             0x5086
59 #define regCGTT_WD_CLK_CTRL_BASE_IDX    1
60 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1   0x4e7e
61 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX  1
62
63 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
64 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
65 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
66 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
67 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
68 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
69 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
70 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
71 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
72 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin");
73 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin");
74 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin");
75 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin");
76 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
77 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
78 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
79 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
80 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
81 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
82 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
83 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
84
85 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
86 {
87         SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
88         SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010),
89         SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
90         SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988),
91         SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007),
92         SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008),
93         SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100),
94         SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
95         SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
96 };
97
98 #define DEFAULT_SH_MEM_CONFIG \
99         ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
100          (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
101          (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
102
103 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev);
104 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev);
105 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev);
106 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev);
107 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev);
108 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev);
109 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev);
110 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
111                                  struct amdgpu_cu_info *cu_info);
112 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev);
113 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
114                                    u32 sh_num, u32 instance);
115 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
116
117 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
118 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
119 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
120                                      uint32_t val);
121 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
122 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
123                                            uint16_t pasid, uint32_t flush_type,
124                                            bool all_hub, uint8_t dst_sel);
125 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev);
126 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev);
127 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
128                                       bool enable);
129
130 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
131 {
132         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
133         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
134                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
135         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
136         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
137         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
138         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
139         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
140         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
141 }
142
143 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring,
144                                  struct amdgpu_ring *ring)
145 {
146         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
147         uint64_t wptr_addr = ring->wptr_gpu_addr;
148         uint32_t me = 0, eng_sel = 0;
149
150         switch (ring->funcs->type) {
151         case AMDGPU_RING_TYPE_COMPUTE:
152                 me = 1;
153                 eng_sel = 0;
154                 break;
155         case AMDGPU_RING_TYPE_GFX:
156                 me = 0;
157                 eng_sel = 4;
158                 break;
159         case AMDGPU_RING_TYPE_MES:
160                 me = 2;
161                 eng_sel = 5;
162                 break;
163         default:
164                 WARN_ON(1);
165         }
166
167         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
168         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
169         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
170                           PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
171                           PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
172                           PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
173                           PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
174                           PACKET3_MAP_QUEUES_ME((me)) |
175                           PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
176                           PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
177                           PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
178                           PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
179         amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
180         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
181         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
182         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
183         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
184 }
185
186 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
187                                    struct amdgpu_ring *ring,
188                                    enum amdgpu_unmap_queues_action action,
189                                    u64 gpu_addr, u64 seq)
190 {
191         struct amdgpu_device *adev = kiq_ring->adev;
192         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
193
194         if (adev->enable_mes && !adev->gfx.kiq.ring.sched.ready) {
195                 amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
196                 return;
197         }
198
199         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
200         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
201                           PACKET3_UNMAP_QUEUES_ACTION(action) |
202                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
203                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
204                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
205         amdgpu_ring_write(kiq_ring,
206                   PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
207
208         if (action == PREEMPT_QUEUES_NO_UNMAP) {
209                 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
210                 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
211                 amdgpu_ring_write(kiq_ring, seq);
212         } else {
213                 amdgpu_ring_write(kiq_ring, 0);
214                 amdgpu_ring_write(kiq_ring, 0);
215                 amdgpu_ring_write(kiq_ring, 0);
216         }
217 }
218
219 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring,
220                                    struct amdgpu_ring *ring,
221                                    u64 addr,
222                                    u64 seq)
223 {
224         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
225
226         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
227         amdgpu_ring_write(kiq_ring,
228                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
229                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
230                           PACKET3_QUERY_STATUS_COMMAND(2));
231         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
232                           PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
233                           PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
234         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
235         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
236         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
237         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
238 }
239
240 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
241                                 uint16_t pasid, uint32_t flush_type,
242                                 bool all_hub)
243 {
244         gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
245 }
246
247 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = {
248         .kiq_set_resources = gfx11_kiq_set_resources,
249         .kiq_map_queues = gfx11_kiq_map_queues,
250         .kiq_unmap_queues = gfx11_kiq_unmap_queues,
251         .kiq_query_status = gfx11_kiq_query_status,
252         .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs,
253         .set_resources_size = 8,
254         .map_queues_size = 7,
255         .unmap_queues_size = 6,
256         .query_status_size = 7,
257         .invalidate_tlbs_size = 2,
258 };
259
260 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
261 {
262         adev->gfx.kiq.pmf = &gfx_v11_0_kiq_pm4_funcs;
263 }
264
265 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
266 {
267         switch (adev->ip_versions[GC_HWIP][0]) {
268         case IP_VERSION(11, 0, 1):
269         case IP_VERSION(11, 0, 4):
270                 soc15_program_register_sequence(adev,
271                                                 golden_settings_gc_11_0_1,
272                                                 (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
273                 break;
274         default:
275                 break;
276         }
277 }
278
279 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
280                                        bool wc, uint32_t reg, uint32_t val)
281 {
282         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
283         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
284                           WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
285         amdgpu_ring_write(ring, reg);
286         amdgpu_ring_write(ring, 0);
287         amdgpu_ring_write(ring, val);
288 }
289
290 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
291                                   int mem_space, int opt, uint32_t addr0,
292                                   uint32_t addr1, uint32_t ref, uint32_t mask,
293                                   uint32_t inv)
294 {
295         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
296         amdgpu_ring_write(ring,
297                           /* memory (1) or register (0) */
298                           (WAIT_REG_MEM_MEM_SPACE(mem_space) |
299                            WAIT_REG_MEM_OPERATION(opt) | /* wait */
300                            WAIT_REG_MEM_FUNCTION(3) |  /* equal */
301                            WAIT_REG_MEM_ENGINE(eng_sel)));
302
303         if (mem_space)
304                 BUG_ON(addr0 & 0x3); /* Dword align */
305         amdgpu_ring_write(ring, addr0);
306         amdgpu_ring_write(ring, addr1);
307         amdgpu_ring_write(ring, ref);
308         amdgpu_ring_write(ring, mask);
309         amdgpu_ring_write(ring, inv); /* poll interval */
310 }
311
312 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
313 {
314         struct amdgpu_device *adev = ring->adev;
315         uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
316         uint32_t tmp = 0;
317         unsigned i;
318         int r;
319
320         WREG32(scratch, 0xCAFEDEAD);
321         r = amdgpu_ring_alloc(ring, 5);
322         if (r) {
323                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
324                           ring->idx, r);
325                 return r;
326         }
327
328         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
329                 gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
330         } else {
331                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
332                 amdgpu_ring_write(ring, scratch -
333                                   PACKET3_SET_UCONFIG_REG_START);
334                 amdgpu_ring_write(ring, 0xDEADBEEF);
335         }
336         amdgpu_ring_commit(ring);
337
338         for (i = 0; i < adev->usec_timeout; i++) {
339                 tmp = RREG32(scratch);
340                 if (tmp == 0xDEADBEEF)
341                         break;
342                 if (amdgpu_emu_mode == 1)
343                         msleep(1);
344                 else
345                         udelay(1);
346         }
347
348         if (i >= adev->usec_timeout)
349                 r = -ETIMEDOUT;
350         return r;
351 }
352
353 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
354 {
355         struct amdgpu_device *adev = ring->adev;
356         struct amdgpu_ib ib;
357         struct dma_fence *f = NULL;
358         unsigned index;
359         uint64_t gpu_addr;
360         volatile uint32_t *cpu_ptr;
361         long r;
362
363         /* MES KIQ fw hasn't indirect buffer support for now */
364         if (adev->enable_mes_kiq &&
365             ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
366                 return 0;
367
368         memset(&ib, 0, sizeof(ib));
369
370         if (ring->is_mes_queue) {
371                 uint32_t padding, offset;
372
373                 offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
374                 padding = amdgpu_mes_ctx_get_offs(ring,
375                                                   AMDGPU_MES_CTX_PADDING_OFFS);
376
377                 ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
378                 ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
379
380                 gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
381                 cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
382                 *cpu_ptr = cpu_to_le32(0xCAFEDEAD);
383         } else {
384                 r = amdgpu_device_wb_get(adev, &index);
385                 if (r)
386                         return r;
387
388                 gpu_addr = adev->wb.gpu_addr + (index * 4);
389                 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
390                 cpu_ptr = &adev->wb.wb[index];
391
392                 r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
393                 if (r) {
394                         DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
395                         goto err1;
396                 }
397         }
398
399         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
400         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
401         ib.ptr[2] = lower_32_bits(gpu_addr);
402         ib.ptr[3] = upper_32_bits(gpu_addr);
403         ib.ptr[4] = 0xDEADBEEF;
404         ib.length_dw = 5;
405
406         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
407         if (r)
408                 goto err2;
409
410         r = dma_fence_wait_timeout(f, false, timeout);
411         if (r == 0) {
412                 r = -ETIMEDOUT;
413                 goto err2;
414         } else if (r < 0) {
415                 goto err2;
416         }
417
418         if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
419                 r = 0;
420         else
421                 r = -EINVAL;
422 err2:
423         if (!ring->is_mes_queue)
424                 amdgpu_ib_free(adev, &ib, NULL);
425         dma_fence_put(f);
426 err1:
427         if (!ring->is_mes_queue)
428                 amdgpu_device_wb_free(adev, index);
429         return r;
430 }
431
432 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev)
433 {
434         release_firmware(adev->gfx.pfp_fw);
435         adev->gfx.pfp_fw = NULL;
436         release_firmware(adev->gfx.me_fw);
437         adev->gfx.me_fw = NULL;
438         release_firmware(adev->gfx.rlc_fw);
439         adev->gfx.rlc_fw = NULL;
440         release_firmware(adev->gfx.mec_fw);
441         adev->gfx.mec_fw = NULL;
442
443         kfree(adev->gfx.rlc.register_list_format);
444 }
445
446 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
447 {
448         char fw_name[40];
449         char ucode_prefix[30];
450         int err;
451         const struct rlc_firmware_header_v2_0 *rlc_hdr;
452         uint16_t version_major;
453         uint16_t version_minor;
454
455         DRM_DEBUG("\n");
456
457         amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
458
459         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix);
460         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
461         if (err)
462                 goto out;
463         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
464         if (err)
465                 goto out;
466         /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
467         adev->gfx.rs64_enable = amdgpu_ucode_hdr_version(
468                                 (union amdgpu_firmware_header *)
469                                 adev->gfx.pfp_fw->data, 2, 0);
470         if (adev->gfx.rs64_enable) {
471                 dev_info(adev->dev, "CP RS64 enable\n");
472                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
473                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
474                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK);
475         } else {
476                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
477         }
478
479         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix);
480         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
481         if (err)
482                 goto out;
483         err = amdgpu_ucode_validate(adev->gfx.me_fw);
484         if (err)
485                 goto out;
486         if (adev->gfx.rs64_enable) {
487                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
488                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
489                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK);
490         } else {
491                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
492         }
493
494         if (!amdgpu_sriov_vf(adev)) {
495                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
496                 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
497                 if (err)
498                         goto out;
499                 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
500                 if (err)
501                         goto out;
502                 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
503                 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
504                 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
505                 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
506                 if (err)
507                         goto out;
508         }
509
510         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix);
511         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
512         if (err)
513                 goto out;
514         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
515         if (err)
516                 goto out;
517         if (adev->gfx.rs64_enable) {
518                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
519                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
520                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
521                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK);
522                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK);
523         } else {
524                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
525                 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
526         }
527
528         /* only one MEC for gfx 11.0.0. */
529         adev->gfx.mec2_fw = NULL;
530
531 out:
532         if (err) {
533                 dev_err(adev->dev,
534                         "gfx11: Failed to init firmware \"%s\"\n",
535                         fw_name);
536                 release_firmware(adev->gfx.pfp_fw);
537                 adev->gfx.pfp_fw = NULL;
538                 release_firmware(adev->gfx.me_fw);
539                 adev->gfx.me_fw = NULL;
540                 release_firmware(adev->gfx.rlc_fw);
541                 adev->gfx.rlc_fw = NULL;
542                 release_firmware(adev->gfx.mec_fw);
543                 adev->gfx.mec_fw = NULL;
544         }
545
546         return err;
547 }
548
549 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev)
550 {
551         const struct psp_firmware_header_v1_0 *toc_hdr;
552         int err = 0;
553         char fw_name[40];
554         char ucode_prefix[30];
555
556         amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
557
558         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix);
559         err = request_firmware(&adev->psp.toc_fw, fw_name, adev->dev);
560         if (err)
561                 goto out;
562
563         err = amdgpu_ucode_validate(adev->psp.toc_fw);
564         if (err)
565                 goto out;
566
567         toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
568         adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
569         adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
570         adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
571         adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
572                                 le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
573         return 0;
574 out:
575         dev_err(adev->dev, "Failed to load TOC microcode\n");
576         release_firmware(adev->psp.toc_fw);
577         adev->psp.toc_fw = NULL;
578         return err;
579 }
580
581 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
582 {
583         u32 count = 0;
584         const struct cs_section_def *sect = NULL;
585         const struct cs_extent_def *ext = NULL;
586
587         /* begin clear state */
588         count += 2;
589         /* context control state */
590         count += 3;
591
592         for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
593                 for (ext = sect->section; ext->extent != NULL; ++ext) {
594                         if (sect->id == SECT_CONTEXT)
595                                 count += 2 + ext->reg_count;
596                         else
597                                 return 0;
598                 }
599         }
600
601         /* set PA_SC_TILE_STEERING_OVERRIDE */
602         count += 3;
603         /* end clear state */
604         count += 2;
605         /* clear state */
606         count += 2;
607
608         return count;
609 }
610
611 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
612                                     volatile u32 *buffer)
613 {
614         u32 count = 0, i;
615         const struct cs_section_def *sect = NULL;
616         const struct cs_extent_def *ext = NULL;
617         int ctx_reg_offset;
618
619         if (adev->gfx.rlc.cs_data == NULL)
620                 return;
621         if (buffer == NULL)
622                 return;
623
624         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
625         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
626
627         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
628         buffer[count++] = cpu_to_le32(0x80000000);
629         buffer[count++] = cpu_to_le32(0x80000000);
630
631         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
632                 for (ext = sect->section; ext->extent != NULL; ++ext) {
633                         if (sect->id == SECT_CONTEXT) {
634                                 buffer[count++] =
635                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
636                                 buffer[count++] = cpu_to_le32(ext->reg_index -
637                                                 PACKET3_SET_CONTEXT_REG_START);
638                                 for (i = 0; i < ext->reg_count; i++)
639                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
640                         } else {
641                                 return;
642                         }
643                 }
644         }
645
646         ctx_reg_offset =
647                 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
648         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
649         buffer[count++] = cpu_to_le32(ctx_reg_offset);
650         buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
651
652         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
653         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
654
655         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
656         buffer[count++] = cpu_to_le32(0);
657 }
658
659 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
660 {
661         /* clear state block */
662         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
663                         &adev->gfx.rlc.clear_state_gpu_addr,
664                         (void **)&adev->gfx.rlc.cs_ptr);
665
666         /* jump table block */
667         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
668                         &adev->gfx.rlc.cp_table_gpu_addr,
669                         (void **)&adev->gfx.rlc.cp_table_ptr);
670 }
671
672 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
673 {
674         struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
675
676         reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
677         reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
678         reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
679         reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
680         reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
681         reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
682         reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
683         reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
684         adev->gfx.rlc.rlcg_reg_access_supported = true;
685 }
686
687 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
688 {
689         const struct cs_section_def *cs_data;
690         int r;
691
692         adev->gfx.rlc.cs_data = gfx11_cs_data;
693
694         cs_data = adev->gfx.rlc.cs_data;
695
696         if (cs_data) {
697                 /* init clear state block */
698                 r = amdgpu_gfx_rlc_init_csb(adev);
699                 if (r)
700                         return r;
701         }
702
703         /* init spm vmid with 0xf */
704         if (adev->gfx.rlc.funcs->update_spm_vmid)
705                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
706
707         return 0;
708 }
709
710 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
711 {
712         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
713         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
714         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
715 }
716
717 static int gfx_v11_0_me_init(struct amdgpu_device *adev)
718 {
719         int r;
720
721         bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
722
723         amdgpu_gfx_graphics_queue_acquire(adev);
724
725         r = gfx_v11_0_init_microcode(adev);
726         if (r)
727                 DRM_ERROR("Failed to load gfx firmware!\n");
728
729         return r;
730 }
731
732 static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
733 {
734         int r;
735         u32 *hpd;
736         size_t mec_hpd_size;
737
738         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
739
740         /* take ownership of the relevant compute queues */
741         amdgpu_gfx_compute_queue_acquire(adev);
742         mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE;
743
744         if (mec_hpd_size) {
745                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
746                                               AMDGPU_GEM_DOMAIN_GTT,
747                                               &adev->gfx.mec.hpd_eop_obj,
748                                               &adev->gfx.mec.hpd_eop_gpu_addr,
749                                               (void **)&hpd);
750                 if (r) {
751                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
752                         gfx_v11_0_mec_fini(adev);
753                         return r;
754                 }
755
756                 memset(hpd, 0, mec_hpd_size);
757
758                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
759                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
760         }
761
762         return 0;
763 }
764
765 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
766 {
767         WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
768                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
769                 (address << SQ_IND_INDEX__INDEX__SHIFT));
770         return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
771 }
772
773 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
774                            uint32_t thread, uint32_t regno,
775                            uint32_t num, uint32_t *out)
776 {
777         WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
778                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
779                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
780                 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
781                 (SQ_IND_INDEX__AUTO_INCR_MASK));
782         while (num--)
783                 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
784 }
785
786 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
787 {
788         /* in gfx11 the SIMD_ID is specified as part of the INSTANCE
789          * field when performing a select_se_sh so it should be
790          * zero here */
791         WARN_ON(simd != 0);
792
793         /* type 3 wave data */
794         dst[(*no_fields)++] = 3;
795         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
796         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
797         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
798         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
799         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
800         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
801         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
802         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
803         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
804         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
805         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
806         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
807         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
808         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
809         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
810 }
811
812 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
813                                      uint32_t wave, uint32_t start,
814                                      uint32_t size, uint32_t *dst)
815 {
816         WARN_ON(simd != 0);
817
818         wave_read_regs(
819                 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
820                 dst);
821 }
822
823 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
824                                       uint32_t wave, uint32_t thread,
825                                       uint32_t start, uint32_t size,
826                                       uint32_t *dst)
827 {
828         wave_read_regs(
829                 adev, wave, thread,
830                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
831 }
832
833 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
834                                                                           u32 me, u32 pipe, u32 q, u32 vm)
835 {
836         soc21_grbm_select(adev, me, pipe, q, vm);
837 }
838
839 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
840         .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter,
841         .select_se_sh = &gfx_v11_0_select_se_sh,
842         .read_wave_data = &gfx_v11_0_read_wave_data,
843         .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs,
844         .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
845         .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
846         .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
847 };
848
849 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
850 {
851         adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
852
853         switch (adev->ip_versions[GC_HWIP][0]) {
854         case IP_VERSION(11, 0, 0):
855         case IP_VERSION(11, 0, 2):
856         case IP_VERSION(11, 0, 3):
857                 adev->gfx.config.max_hw_contexts = 8;
858                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
859                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
860                 adev->gfx.config.sc_hiz_tile_fifo_size = 0;
861                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
862                 break;
863         case IP_VERSION(11, 0, 1):
864         case IP_VERSION(11, 0, 4):
865                 adev->gfx.config.max_hw_contexts = 8;
866                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
867                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
868                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
869                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300;
870                 break;
871         default:
872                 BUG();
873                 break;
874         }
875
876         return 0;
877 }
878
879 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
880                                    int me, int pipe, int queue)
881 {
882         int r;
883         struct amdgpu_ring *ring;
884         unsigned int irq_type;
885
886         ring = &adev->gfx.gfx_ring[ring_id];
887
888         ring->me = me;
889         ring->pipe = pipe;
890         ring->queue = queue;
891
892         ring->ring_obj = NULL;
893         ring->use_doorbell = true;
894
895         if (!ring_id)
896                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
897         else
898                 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
899         sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
900
901         irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
902         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
903                              AMDGPU_RING_PRIO_DEFAULT, NULL);
904         if (r)
905                 return r;
906         return 0;
907 }
908
909 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
910                                        int mec, int pipe, int queue)
911 {
912         int r;
913         unsigned irq_type;
914         struct amdgpu_ring *ring;
915         unsigned int hw_prio;
916
917         ring = &adev->gfx.compute_ring[ring_id];
918
919         /* mec0 is me1 */
920         ring->me = mec + 1;
921         ring->pipe = pipe;
922         ring->queue = queue;
923
924         ring->ring_obj = NULL;
925         ring->use_doorbell = true;
926         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
927         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
928                                 + (ring_id * GFX11_MEC_HPD_SIZE);
929         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
930
931         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
932                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
933                 + ring->pipe;
934         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
935                         AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
936         /* type-2 packets are deprecated on MEC, use type-3 instead */
937         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
938                              hw_prio, NULL);
939         if (r)
940                 return r;
941
942         return 0;
943 }
944
945 static struct {
946         SOC21_FIRMWARE_ID       id;
947         unsigned int            offset;
948         unsigned int            size;
949 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX];
950
951 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
952 {
953         RLC_TABLE_OF_CONTENT *ucode = rlc_toc;
954
955         while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) &&
956                         (ucode->id < SOC21_FIRMWARE_ID_MAX)) {
957                 rlc_autoload_info[ucode->id].id = ucode->id;
958                 rlc_autoload_info[ucode->id].offset = ucode->offset * 4;
959                 rlc_autoload_info[ucode->id].size = ucode->size * 4;
960
961                 ucode++;
962         }
963 }
964
965 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev)
966 {
967         uint32_t total_size = 0;
968         SOC21_FIRMWARE_ID id;
969
970         gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
971
972         for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++)
973                 total_size += rlc_autoload_info[id].size;
974
975         /* In case the offset in rlc toc ucode is aligned */
976         if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset)
977                 total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset +
978                         rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size;
979
980         return total_size;
981 }
982
983 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
984 {
985         int r;
986         uint32_t total_size;
987
988         total_size = gfx_v11_0_calc_toc_total_size(adev);
989
990         r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
991                         AMDGPU_GEM_DOMAIN_VRAM,
992                         &adev->gfx.rlc.rlc_autoload_bo,
993                         &adev->gfx.rlc.rlc_autoload_gpu_addr,
994                         (void **)&adev->gfx.rlc.rlc_autoload_ptr);
995
996         if (r) {
997                 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
998                 return r;
999         }
1000
1001         return 0;
1002 }
1003
1004 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
1005                                               SOC21_FIRMWARE_ID id,
1006                                               const void *fw_data,
1007                                               uint32_t fw_size,
1008                                               uint32_t *fw_autoload_mask)
1009 {
1010         uint32_t toc_offset;
1011         uint32_t toc_fw_size;
1012         char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
1013
1014         if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX)
1015                 return;
1016
1017         toc_offset = rlc_autoload_info[id].offset;
1018         toc_fw_size = rlc_autoload_info[id].size;
1019
1020         if (fw_size == 0)
1021                 fw_size = toc_fw_size;
1022
1023         if (fw_size > toc_fw_size)
1024                 fw_size = toc_fw_size;
1025
1026         memcpy(ptr + toc_offset, fw_data, fw_size);
1027
1028         if (fw_size < toc_fw_size)
1029                 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
1030
1031         if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME))
1032                 *(uint64_t *)fw_autoload_mask |= 1ULL << id;
1033 }
1034
1035 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev,
1036                                                         uint32_t *fw_autoload_mask)
1037 {
1038         void *data;
1039         uint32_t size;
1040         uint64_t *toc_ptr;
1041
1042         *(uint64_t *)fw_autoload_mask |= 0x1;
1043
1044         DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask);
1045
1046         data = adev->psp.toc.start_addr;
1047         size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size;
1048
1049         toc_ptr = (uint64_t *)data + size / 8 - 1;
1050         *toc_ptr = *(uint64_t *)fw_autoload_mask;
1051
1052         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC,
1053                                         data, size, fw_autoload_mask);
1054 }
1055
1056 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev,
1057                                                         uint32_t *fw_autoload_mask)
1058 {
1059         const __le32 *fw_data;
1060         uint32_t fw_size;
1061         const struct gfx_firmware_header_v1_0 *cp_hdr;
1062         const struct gfx_firmware_header_v2_0 *cpv2_hdr;
1063         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1064         const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
1065         uint16_t version_major, version_minor;
1066
1067         if (adev->gfx.rs64_enable) {
1068                 /* pfp ucode */
1069                 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1070                         adev->gfx.pfp_fw->data;
1071                 /* instruction */
1072                 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1073                         le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1074                 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1075                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP,
1076                                                 fw_data, fw_size, fw_autoload_mask);
1077                 /* data */
1078                 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1079                         le32_to_cpu(cpv2_hdr->data_offset_bytes));
1080                 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1081                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK,
1082                                                 fw_data, fw_size, fw_autoload_mask);
1083                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK,
1084                                                 fw_data, fw_size, fw_autoload_mask);
1085                 /* me ucode */
1086                 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1087                         adev->gfx.me_fw->data;
1088                 /* instruction */
1089                 fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1090                         le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1091                 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1092                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME,
1093                                                 fw_data, fw_size, fw_autoload_mask);
1094                 /* data */
1095                 fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1096                         le32_to_cpu(cpv2_hdr->data_offset_bytes));
1097                 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1098                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK,
1099                                                 fw_data, fw_size, fw_autoload_mask);
1100                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK,
1101                                                 fw_data, fw_size, fw_autoload_mask);
1102                 /* mec ucode */
1103                 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1104                         adev->gfx.mec_fw->data;
1105                 /* instruction */
1106                 fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1107                         le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1108                 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1109                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC,
1110                                                 fw_data, fw_size, fw_autoload_mask);
1111                 /* data */
1112                 fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1113                         le32_to_cpu(cpv2_hdr->data_offset_bytes));
1114                 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1115                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK,
1116                                                 fw_data, fw_size, fw_autoload_mask);
1117                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK,
1118                                                 fw_data, fw_size, fw_autoload_mask);
1119                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK,
1120                                                 fw_data, fw_size, fw_autoload_mask);
1121                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK,
1122                                                 fw_data, fw_size, fw_autoload_mask);
1123         } else {
1124                 /* pfp ucode */
1125                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1126                         adev->gfx.pfp_fw->data;
1127                 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1128                                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1129                 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1130                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP,
1131                                                 fw_data, fw_size, fw_autoload_mask);
1132
1133                 /* me ucode */
1134                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1135                         adev->gfx.me_fw->data;
1136                 fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1137                                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1138                 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1139                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME,
1140                                                 fw_data, fw_size, fw_autoload_mask);
1141
1142                 /* mec ucode */
1143                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1144                         adev->gfx.mec_fw->data;
1145                 fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1146                                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1147                 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1148                         cp_hdr->jt_size * 4;
1149                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC,
1150                                                 fw_data, fw_size, fw_autoload_mask);
1151         }
1152
1153         /* rlc ucode */
1154         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
1155                 adev->gfx.rlc_fw->data;
1156         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1157                         le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
1158         fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
1159         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE,
1160                                         fw_data, fw_size, fw_autoload_mask);
1161
1162         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1163         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1164         if (version_major == 2) {
1165                 if (version_minor >= 2) {
1166                         rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1167
1168                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1169                                         le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
1170                         fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
1171                         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE,
1172                                         fw_data, fw_size, fw_autoload_mask);
1173
1174                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1175                                         le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
1176                         fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
1177                         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT,
1178                                         fw_data, fw_size, fw_autoload_mask);
1179                 }
1180         }
1181 }
1182
1183 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev,
1184                                                         uint32_t *fw_autoload_mask)
1185 {
1186         const __le32 *fw_data;
1187         uint32_t fw_size;
1188         const struct sdma_firmware_header_v2_0 *sdma_hdr;
1189
1190         sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
1191                 adev->sdma.instance[0].fw->data;
1192         fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1193                         le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
1194         fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
1195
1196         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1197                         SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask);
1198
1199         fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1200                         le32_to_cpu(sdma_hdr->ctl_ucode_offset));
1201         fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
1202
1203         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1204                         SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask);
1205 }
1206
1207 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev,
1208                                                         uint32_t *fw_autoload_mask)
1209 {
1210         const __le32 *fw_data;
1211         unsigned fw_size;
1212         const struct mes_firmware_header_v1_0 *mes_hdr;
1213         int pipe, ucode_id, data_id;
1214
1215         for (pipe = 0; pipe < 2; pipe++) {
1216                 if (pipe==0) {
1217                         ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0;
1218                         data_id  = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK;
1219                 } else {
1220                         ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1;
1221                         data_id  = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK;
1222                 }
1223
1224                 mes_hdr = (const struct mes_firmware_header_v1_0 *)
1225                         adev->mes.fw[pipe]->data;
1226
1227                 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1228                                 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
1229                 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
1230
1231                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1232                                 ucode_id, fw_data, fw_size, fw_autoload_mask);
1233
1234                 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1235                                 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
1236                 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
1237
1238                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1239                                 data_id, fw_data, fw_size, fw_autoload_mask);
1240         }
1241 }
1242
1243 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
1244 {
1245         uint32_t rlc_g_offset, rlc_g_size;
1246         uint64_t gpu_addr;
1247         uint32_t autoload_fw_id[2];
1248
1249         memset(autoload_fw_id, 0, sizeof(uint32_t) * 2);
1250
1251         /* RLC autoload sequence 2: copy ucode */
1252         gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id);
1253         gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id);
1254         gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id);
1255         gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id);
1256
1257         rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset;
1258         rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size;
1259         gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
1260
1261         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
1262         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
1263
1264         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
1265
1266         /* RLC autoload sequence 3: load IMU fw */
1267         if (adev->gfx.imu.funcs->load_microcode)
1268                 adev->gfx.imu.funcs->load_microcode(adev);
1269         /* RLC autoload sequence 4 init IMU fw */
1270         if (adev->gfx.imu.funcs->setup_imu)
1271                 adev->gfx.imu.funcs->setup_imu(adev);
1272         if (adev->gfx.imu.funcs->start_imu)
1273                 adev->gfx.imu.funcs->start_imu(adev);
1274
1275         /* RLC autoload sequence 5 disable gpa mode */
1276         gfx_v11_0_disable_gpa_mode(adev);
1277
1278         return 0;
1279 }
1280
1281 static int gfx_v11_0_sw_init(void *handle)
1282 {
1283         int i, j, k, r, ring_id = 0;
1284         struct amdgpu_kiq *kiq;
1285         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1286
1287         adev->gfxhub.funcs->init(adev);
1288
1289         switch (adev->ip_versions[GC_HWIP][0]) {
1290         case IP_VERSION(11, 0, 0):
1291         case IP_VERSION(11, 0, 2):
1292         case IP_VERSION(11, 0, 3):
1293                 adev->gfx.me.num_me = 1;
1294                 adev->gfx.me.num_pipe_per_me = 1;
1295                 adev->gfx.me.num_queue_per_pipe = 1;
1296                 adev->gfx.mec.num_mec = 2;
1297                 adev->gfx.mec.num_pipe_per_mec = 4;
1298                 adev->gfx.mec.num_queue_per_pipe = 4;
1299                 break;
1300         case IP_VERSION(11, 0, 1):
1301         case IP_VERSION(11, 0, 4):
1302                 adev->gfx.me.num_me = 1;
1303                 adev->gfx.me.num_pipe_per_me = 1;
1304                 adev->gfx.me.num_queue_per_pipe = 1;
1305                 adev->gfx.mec.num_mec = 1;
1306                 adev->gfx.mec.num_pipe_per_mec = 4;
1307                 adev->gfx.mec.num_queue_per_pipe = 4;
1308                 break;
1309         default:
1310                 adev->gfx.me.num_me = 1;
1311                 adev->gfx.me.num_pipe_per_me = 1;
1312                 adev->gfx.me.num_queue_per_pipe = 1;
1313                 adev->gfx.mec.num_mec = 1;
1314                 adev->gfx.mec.num_pipe_per_mec = 4;
1315                 adev->gfx.mec.num_queue_per_pipe = 8;
1316                 break;
1317         }
1318
1319         /* EOP Event */
1320         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1321                               GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
1322                               &adev->gfx.eop_irq);
1323         if (r)
1324                 return r;
1325
1326         /* Privileged reg */
1327         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1328                               GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
1329                               &adev->gfx.priv_reg_irq);
1330         if (r)
1331                 return r;
1332
1333         /* Privileged inst */
1334         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1335                               GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
1336                               &adev->gfx.priv_inst_irq);
1337         if (r)
1338                 return r;
1339
1340         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1341
1342         if (adev->gfx.imu.funcs) {
1343                 if (adev->gfx.imu.funcs->init_microcode) {
1344                         r = adev->gfx.imu.funcs->init_microcode(adev);
1345                         if (r)
1346                                 DRM_ERROR("Failed to load imu firmware!\n");
1347                 }
1348         }
1349
1350         r = gfx_v11_0_me_init(adev);
1351         if (r)
1352                 return r;
1353
1354         r = gfx_v11_0_rlc_init(adev);
1355         if (r) {
1356                 DRM_ERROR("Failed to init rlc BOs!\n");
1357                 return r;
1358         }
1359
1360         r = gfx_v11_0_mec_init(adev);
1361         if (r) {
1362                 DRM_ERROR("Failed to init MEC BOs!\n");
1363                 return r;
1364         }
1365
1366         /* set up the gfx ring */
1367         for (i = 0; i < adev->gfx.me.num_me; i++) {
1368                 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
1369                         for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
1370                                 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
1371                                         continue;
1372
1373                                 r = gfx_v11_0_gfx_ring_init(adev, ring_id,
1374                                                             i, k, j);
1375                                 if (r)
1376                                         return r;
1377                                 ring_id++;
1378                         }
1379                 }
1380         }
1381
1382         ring_id = 0;
1383         /* set up the compute queues - allocate horizontally across pipes */
1384         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1385                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1386                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1387                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k,
1388                                                                      j))
1389                                         continue;
1390
1391                                 r = gfx_v11_0_compute_ring_init(adev, ring_id,
1392                                                                 i, k, j);
1393                                 if (r)
1394                                         return r;
1395
1396                                 ring_id++;
1397                         }
1398                 }
1399         }
1400
1401         if (!adev->enable_mes_kiq) {
1402                 r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE);
1403                 if (r) {
1404                         DRM_ERROR("Failed to init KIQ BOs!\n");
1405                         return r;
1406                 }
1407
1408                 kiq = &adev->gfx.kiq;
1409                 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1410                 if (r)
1411                         return r;
1412         }
1413
1414         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd));
1415         if (r)
1416                 return r;
1417
1418         /* allocate visible FB for rlc auto-loading fw */
1419         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1420                 r = gfx_v11_0_init_toc_microcode(adev);
1421                 if (r)
1422                         dev_err(adev->dev, "Failed to load toc firmware!\n");
1423                 r = gfx_v11_0_rlc_autoload_buffer_init(adev);
1424                 if (r)
1425                         return r;
1426         }
1427
1428         r = gfx_v11_0_gpu_early_init(adev);
1429         if (r)
1430                 return r;
1431
1432         return 0;
1433 }
1434
1435 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev)
1436 {
1437         amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
1438                               &adev->gfx.pfp.pfp_fw_gpu_addr,
1439                               (void **)&adev->gfx.pfp.pfp_fw_ptr);
1440
1441         amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
1442                               &adev->gfx.pfp.pfp_fw_data_gpu_addr,
1443                               (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
1444 }
1445
1446 static void gfx_v11_0_me_fini(struct amdgpu_device *adev)
1447 {
1448         amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
1449                               &adev->gfx.me.me_fw_gpu_addr,
1450                               (void **)&adev->gfx.me.me_fw_ptr);
1451
1452         amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
1453                                &adev->gfx.me.me_fw_data_gpu_addr,
1454                                (void **)&adev->gfx.me.me_fw_data_ptr);
1455 }
1456
1457 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
1458 {
1459         amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
1460                         &adev->gfx.rlc.rlc_autoload_gpu_addr,
1461                         (void **)&adev->gfx.rlc.rlc_autoload_ptr);
1462 }
1463
1464 static int gfx_v11_0_sw_fini(void *handle)
1465 {
1466         int i;
1467         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1468
1469         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1470                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1471         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1472                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1473
1474         amdgpu_gfx_mqd_sw_fini(adev);
1475
1476         if (!adev->enable_mes_kiq) {
1477                 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
1478                 amdgpu_gfx_kiq_fini(adev);
1479         }
1480
1481         gfx_v11_0_pfp_fini(adev);
1482         gfx_v11_0_me_fini(adev);
1483         gfx_v11_0_rlc_fini(adev);
1484         gfx_v11_0_mec_fini(adev);
1485
1486         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1487                 gfx_v11_0_rlc_autoload_buffer_fini(adev);
1488
1489         gfx_v11_0_free_microcode(adev);
1490
1491         return 0;
1492 }
1493
1494 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
1495                                    u32 sh_num, u32 instance)
1496 {
1497         u32 data;
1498
1499         if (instance == 0xffffffff)
1500                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
1501                                      INSTANCE_BROADCAST_WRITES, 1);
1502         else
1503                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
1504                                      instance);
1505
1506         if (se_num == 0xffffffff)
1507                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
1508                                      1);
1509         else
1510                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1511
1512         if (sh_num == 0xffffffff)
1513                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
1514                                      1);
1515         else
1516                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
1517
1518         WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
1519 }
1520
1521 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1522 {
1523         u32 data, mask;
1524
1525         data = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
1526         data |= RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
1527
1528         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1529         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1530
1531         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1532                                          adev->gfx.config.max_sh_per_se);
1533
1534         return (~data) & mask;
1535 }
1536
1537 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
1538 {
1539         int i, j;
1540         u32 data;
1541         u32 active_rbs = 0;
1542         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1543                                         adev->gfx.config.max_sh_per_se;
1544
1545         mutex_lock(&adev->grbm_idx_mutex);
1546         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1547                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1548                         gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff);
1549                         data = gfx_v11_0_get_rb_active_bitmap(adev);
1550                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1551                                                rb_bitmap_width_per_sh);
1552                 }
1553         }
1554         gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1555         mutex_unlock(&adev->grbm_idx_mutex);
1556
1557         adev->gfx.config.backend_enable_mask = active_rbs;
1558         adev->gfx.config.num_rbs = hweight32(active_rbs);
1559 }
1560
1561 #define DEFAULT_SH_MEM_BASES    (0x6000)
1562 #define LDS_APP_BASE           0x1
1563 #define SCRATCH_APP_BASE       0x2
1564
1565 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
1566 {
1567         int i;
1568         uint32_t sh_mem_bases;
1569         uint32_t data;
1570
1571         /*
1572          * Configure apertures:
1573          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1574          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1575          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1576          */
1577         sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
1578                         SCRATCH_APP_BASE;
1579
1580         mutex_lock(&adev->srbm_mutex);
1581         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1582                 soc21_grbm_select(adev, 0, 0, 0, i);
1583                 /* CP and shaders */
1584                 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1585                 WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
1586
1587                 /* Enable trap for each kfd vmid. */
1588                 data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
1589                 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
1590         }
1591         soc21_grbm_select(adev, 0, 0, 0, 0);
1592         mutex_unlock(&adev->srbm_mutex);
1593
1594         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
1595            acccess. These should be enabled by FW for target VMIDs. */
1596         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1597                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
1598                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
1599                 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0);
1600                 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0);
1601         }
1602 }
1603
1604 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev)
1605 {
1606         int vmid;
1607
1608         /*
1609          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
1610          * access. Compute VMIDs should be enabled by FW for target VMIDs,
1611          * the driver can enable them for graphics. VMID0 should maintain
1612          * access so that HWS firmware can save/restore entries.
1613          */
1614         for (vmid = 1; vmid < 16; vmid++) {
1615                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0);
1616                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0);
1617                 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0);
1618                 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0);
1619         }
1620 }
1621
1622 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev)
1623 {
1624         /* TODO: harvest feature to be added later. */
1625 }
1626
1627 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev)
1628 {
1629         /* TCCs are global (not instanced). */
1630         uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) |
1631                                RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE);
1632
1633         adev->gfx.config.tcc_disabled_mask =
1634                 REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
1635                 (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
1636 }
1637
1638 static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
1639 {
1640         u32 tmp;
1641         int i;
1642
1643         WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1644
1645         gfx_v11_0_setup_rb(adev);
1646         gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info);
1647         gfx_v11_0_get_tcc_info(adev);
1648         adev->gfx.config.pa_sc_tile_steering_override = 0;
1649
1650         /* XXX SH_MEM regs */
1651         /* where to put LDS, scratch, GPUVM in FSA64 space */
1652         mutex_lock(&adev->srbm_mutex);
1653         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
1654                 soc21_grbm_select(adev, 0, 0, 0, i);
1655                 /* CP and shaders */
1656                 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1657                 if (i != 0) {
1658                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1659                                 (adev->gmc.private_aperture_start >> 48));
1660                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1661                                 (adev->gmc.shared_aperture_start >> 48));
1662                         WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
1663                 }
1664         }
1665         soc21_grbm_select(adev, 0, 0, 0, 0);
1666
1667         mutex_unlock(&adev->srbm_mutex);
1668
1669         gfx_v11_0_init_compute_vmid(adev);
1670         gfx_v11_0_init_gds_vmid(adev);
1671 }
1672
1673 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
1674                                                bool enable)
1675 {
1676         u32 tmp;
1677
1678         if (amdgpu_sriov_vf(adev))
1679                 return;
1680
1681         tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0);
1682
1683         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
1684                             enable ? 1 : 0);
1685         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
1686                             enable ? 1 : 0);
1687         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
1688                             enable ? 1 : 0);
1689         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
1690                             enable ? 1 : 0);
1691
1692         WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp);
1693 }
1694
1695 static int gfx_v11_0_init_csb(struct amdgpu_device *adev)
1696 {
1697         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
1698
1699         WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
1700                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
1701         WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
1702                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
1703         WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
1704
1705         return 0;
1706 }
1707
1708 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)
1709 {
1710         u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
1711
1712         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
1713         WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
1714 }
1715
1716 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev)
1717 {
1718         WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
1719         udelay(50);
1720         WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
1721         udelay(50);
1722 }
1723
1724 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
1725                                              bool enable)
1726 {
1727         uint32_t rlc_pg_cntl;
1728
1729         rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
1730
1731         if (!enable) {
1732                 /* RLC_PG_CNTL[23] = 0 (default)
1733                  * RLC will wait for handshake acks with SMU
1734                  * GFXOFF will be enabled
1735                  * RLC_PG_CNTL[23] = 1
1736                  * RLC will not issue any message to SMU
1737                  * hence no handshake between SMU & RLC
1738                  * GFXOFF will be disabled
1739                  */
1740                 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1741         } else
1742                 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1743         WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
1744 }
1745
1746 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev)
1747 {
1748         /* TODO: enable rlc & smu handshake until smu
1749          * and gfxoff feature works as expected */
1750         if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
1751                 gfx_v11_0_rlc_smu_handshake_cntl(adev, false);
1752
1753         WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
1754         udelay(50);
1755 }
1756
1757 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev)
1758 {
1759         uint32_t tmp;
1760
1761         /* enable Save Restore Machine */
1762         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
1763         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
1764         tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
1765         WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
1766 }
1767
1768 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev)
1769 {
1770         const struct rlc_firmware_header_v2_0 *hdr;
1771         const __le32 *fw_data;
1772         unsigned i, fw_size;
1773
1774         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1775         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1776                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1777         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1778
1779         WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
1780                      RLCG_UCODE_LOADING_START_ADDRESS);
1781
1782         for (i = 0; i < fw_size; i++)
1783                 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
1784                              le32_to_cpup(fw_data++));
1785
1786         WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
1787 }
1788
1789 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
1790 {
1791         const struct rlc_firmware_header_v2_2 *hdr;
1792         const __le32 *fw_data;
1793         unsigned i, fw_size;
1794         u32 tmp;
1795
1796         hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1797
1798         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1799                         le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
1800         fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
1801
1802         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
1803
1804         for (i = 0; i < fw_size; i++) {
1805                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1806                         msleep(1);
1807                 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
1808                                 le32_to_cpup(fw_data++));
1809         }
1810
1811         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1812
1813         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1814                         le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
1815         fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
1816
1817         WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
1818         for (i = 0; i < fw_size; i++) {
1819                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1820                         msleep(1);
1821                 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
1822                                 le32_to_cpup(fw_data++));
1823         }
1824
1825         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1826
1827         tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
1828         tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
1829         tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
1830         WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
1831 }
1832
1833 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev)
1834 {
1835         const struct rlc_firmware_header_v2_3 *hdr;
1836         const __le32 *fw_data;
1837         unsigned i, fw_size;
1838         u32 tmp;
1839
1840         hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
1841
1842         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1843                         le32_to_cpu(hdr->rlcp_ucode_offset_bytes));
1844         fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4;
1845
1846         WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0);
1847
1848         for (i = 0; i < fw_size; i++) {
1849                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1850                         msleep(1);
1851                 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA,
1852                                 le32_to_cpup(fw_data++));
1853         }
1854
1855         WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version);
1856
1857         tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
1858         tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
1859         WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp);
1860
1861         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1862                         le32_to_cpu(hdr->rlcv_ucode_offset_bytes));
1863         fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4;
1864
1865         WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0);
1866
1867         for (i = 0; i < fw_size; i++) {
1868                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1869                         msleep(1);
1870                 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA,
1871                                 le32_to_cpup(fw_data++));
1872         }
1873
1874         WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version);
1875
1876         tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL);
1877         tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1);
1878         WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp);
1879 }
1880
1881 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
1882 {
1883         const struct rlc_firmware_header_v2_0 *hdr;
1884         uint16_t version_major;
1885         uint16_t version_minor;
1886
1887         if (!adev->gfx.rlc_fw)
1888                 return -EINVAL;
1889
1890         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1891         amdgpu_ucode_print_rlc_hdr(&hdr->header);
1892
1893         version_major = le16_to_cpu(hdr->header.header_version_major);
1894         version_minor = le16_to_cpu(hdr->header.header_version_minor);
1895
1896         if (version_major == 2) {
1897                 gfx_v11_0_load_rlcg_microcode(adev);
1898                 if (amdgpu_dpm == 1) {
1899                         if (version_minor >= 2)
1900                                 gfx_v11_0_load_rlc_iram_dram_microcode(adev);
1901                         if (version_minor == 3)
1902                                 gfx_v11_0_load_rlcp_rlcv_microcode(adev);
1903                 }
1904                 
1905                 return 0;
1906         }
1907
1908         return -EINVAL;
1909 }
1910
1911 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev)
1912 {
1913         int r;
1914
1915         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1916                 gfx_v11_0_init_csb(adev);
1917
1918                 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
1919                         gfx_v11_0_rlc_enable_srm(adev);
1920         } else {
1921                 if (amdgpu_sriov_vf(adev)) {
1922                         gfx_v11_0_init_csb(adev);
1923                         return 0;
1924                 }
1925
1926                 adev->gfx.rlc.funcs->stop(adev);
1927
1928                 /* disable CG */
1929                 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
1930
1931                 /* disable PG */
1932                 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
1933
1934                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1935                         /* legacy rlc firmware loading */
1936                         r = gfx_v11_0_rlc_load_microcode(adev);
1937                         if (r)
1938                                 return r;
1939                 }
1940
1941                 gfx_v11_0_init_csb(adev);
1942
1943                 adev->gfx.rlc.funcs->start(adev);
1944         }
1945         return 0;
1946 }
1947
1948 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
1949 {
1950         uint32_t usec_timeout = 50000;  /* wait for 50ms */
1951         uint32_t tmp;
1952         int i;
1953
1954         /* Trigger an invalidation of the L1 instruction caches */
1955         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
1956         tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
1957         WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
1958
1959         /* Wait for invalidation complete */
1960         for (i = 0; i < usec_timeout; i++) {
1961                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
1962                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
1963                                         INVALIDATE_CACHE_COMPLETE))
1964                         break;
1965                 udelay(1);
1966         }
1967
1968         if (i >= usec_timeout) {
1969                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
1970                 return -EINVAL;
1971         }
1972
1973         if (amdgpu_emu_mode == 1)
1974                 adev->hdp.funcs->flush_hdp(adev, NULL);
1975
1976         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
1977         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
1978         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
1979         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
1980         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
1981         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
1982
1983         /* Program me ucode address into intruction cache address register */
1984         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
1985                         lower_32_bits(addr) & 0xFFFFF000);
1986         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
1987                         upper_32_bits(addr));
1988
1989         return 0;
1990 }
1991
1992 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
1993 {
1994         uint32_t usec_timeout = 50000;  /* wait for 50ms */
1995         uint32_t tmp;
1996         int i;
1997
1998         /* Trigger an invalidation of the L1 instruction caches */
1999         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2000         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2001         WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2002
2003         /* Wait for invalidation complete */
2004         for (i = 0; i < usec_timeout; i++) {
2005                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2006                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2007                                         INVALIDATE_CACHE_COMPLETE))
2008                         break;
2009                 udelay(1);
2010         }
2011
2012         if (i >= usec_timeout) {
2013                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2014                 return -EINVAL;
2015         }
2016
2017         if (amdgpu_emu_mode == 1)
2018                 adev->hdp.funcs->flush_hdp(adev, NULL);
2019
2020         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2021         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2022         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2023         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2024         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2025         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2026
2027         /* Program pfp ucode address into intruction cache address register */
2028         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2029                         lower_32_bits(addr) & 0xFFFFF000);
2030         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2031                         upper_32_bits(addr));
2032
2033         return 0;
2034 }
2035
2036 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
2037 {
2038         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2039         uint32_t tmp;
2040         int i;
2041
2042         /* Trigger an invalidation of the L1 instruction caches */
2043         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2044         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2045
2046         WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2047
2048         /* Wait for invalidation complete */
2049         for (i = 0; i < usec_timeout; i++) {
2050                 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2051                 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2052                                         INVALIDATE_CACHE_COMPLETE))
2053                         break;
2054                 udelay(1);
2055         }
2056
2057         if (i >= usec_timeout) {
2058                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2059                 return -EINVAL;
2060         }
2061
2062         if (amdgpu_emu_mode == 1)
2063                 adev->hdp.funcs->flush_hdp(adev, NULL);
2064
2065         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2066         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2067         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2068         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2069         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2070
2071         /* Program mec1 ucode address into intruction cache address register */
2072         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
2073                         lower_32_bits(addr) & 0xFFFFF000);
2074         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2075                         upper_32_bits(addr));
2076
2077         return 0;
2078 }
2079
2080 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2081 {
2082         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2083         uint32_t tmp;
2084         unsigned i, pipe_id;
2085         const struct gfx_firmware_header_v2_0 *pfp_hdr;
2086
2087         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2088                 adev->gfx.pfp_fw->data;
2089
2090         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2091                 lower_32_bits(addr));
2092         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2093                 upper_32_bits(addr));
2094
2095         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2096         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2097         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2098         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2099         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2100
2101         /*
2102          * Programming any of the CP_PFP_IC_BASE registers
2103          * forces invalidation of the ME L1 I$. Wait for the
2104          * invalidation complete
2105          */
2106         for (i = 0; i < usec_timeout; i++) {
2107                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2108                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2109                         INVALIDATE_CACHE_COMPLETE))
2110                         break;
2111                 udelay(1);
2112         }
2113
2114         if (i >= usec_timeout) {
2115                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2116                 return -EINVAL;
2117         }
2118
2119         /* Prime the L1 instruction caches */
2120         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2121         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2122         WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2123         /* Waiting for cache primed*/
2124         for (i = 0; i < usec_timeout; i++) {
2125                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2126                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2127                         ICACHE_PRIMED))
2128                         break;
2129                 udelay(1);
2130         }
2131
2132         if (i >= usec_timeout) {
2133                 dev_err(adev->dev, "failed to prime instruction cache\n");
2134                 return -EINVAL;
2135         }
2136
2137         mutex_lock(&adev->srbm_mutex);
2138         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2139                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2140                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2141                         (pfp_hdr->ucode_start_addr_hi << 30) |
2142                         (pfp_hdr->ucode_start_addr_lo >> 2));
2143                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2144                         pfp_hdr->ucode_start_addr_hi >> 2);
2145
2146                 /*
2147                  * Program CP_ME_CNTL to reset given PIPE to take
2148                  * effect of CP_PFP_PRGRM_CNTR_START.
2149                  */
2150                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2151                 if (pipe_id == 0)
2152                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2153                                         PFP_PIPE0_RESET, 1);
2154                 else
2155                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2156                                         PFP_PIPE1_RESET, 1);
2157                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2158
2159                 /* Clear pfp pipe0 reset bit. */
2160                 if (pipe_id == 0)
2161                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2162                                         PFP_PIPE0_RESET, 0);
2163                 else
2164                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2165                                         PFP_PIPE1_RESET, 0);
2166                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2167
2168                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2169                         lower_32_bits(addr2));
2170                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2171                         upper_32_bits(addr2));
2172         }
2173         soc21_grbm_select(adev, 0, 0, 0, 0);
2174         mutex_unlock(&adev->srbm_mutex);
2175
2176         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2177         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2178         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2179         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2180
2181         /* Invalidate the data caches */
2182         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2183         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2184         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2185
2186         for (i = 0; i < usec_timeout; i++) {
2187                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2188                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2189                         INVALIDATE_DCACHE_COMPLETE))
2190                         break;
2191                 udelay(1);
2192         }
2193
2194         if (i >= usec_timeout) {
2195                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2196                 return -EINVAL;
2197         }
2198
2199         return 0;
2200 }
2201
2202 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2203 {
2204         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2205         uint32_t tmp;
2206         unsigned i, pipe_id;
2207         const struct gfx_firmware_header_v2_0 *me_hdr;
2208
2209         me_hdr = (const struct gfx_firmware_header_v2_0 *)
2210                 adev->gfx.me_fw->data;
2211
2212         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2213                 lower_32_bits(addr));
2214         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2215                 upper_32_bits(addr));
2216
2217         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2218         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2219         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2220         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2221         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2222
2223         /*
2224          * Programming any of the CP_ME_IC_BASE registers
2225          * forces invalidation of the ME L1 I$. Wait for the
2226          * invalidation complete
2227          */
2228         for (i = 0; i < usec_timeout; i++) {
2229                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2230                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2231                         INVALIDATE_CACHE_COMPLETE))
2232                         break;
2233                 udelay(1);
2234         }
2235
2236         if (i >= usec_timeout) {
2237                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2238                 return -EINVAL;
2239         }
2240
2241         /* Prime the instruction caches */
2242         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2243         tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
2244         WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2245
2246         /* Waiting for instruction cache primed*/
2247         for (i = 0; i < usec_timeout; i++) {
2248                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2249                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2250                         ICACHE_PRIMED))
2251                         break;
2252                 udelay(1);
2253         }
2254
2255         if (i >= usec_timeout) {
2256                 dev_err(adev->dev, "failed to prime instruction cache\n");
2257                 return -EINVAL;
2258         }
2259
2260         mutex_lock(&adev->srbm_mutex);
2261         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2262                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2263                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2264                         (me_hdr->ucode_start_addr_hi << 30) |
2265                         (me_hdr->ucode_start_addr_lo >> 2) );
2266                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2267                         me_hdr->ucode_start_addr_hi>>2);
2268
2269                 /*
2270                  * Program CP_ME_CNTL to reset given PIPE to take
2271                  * effect of CP_PFP_PRGRM_CNTR_START.
2272                  */
2273                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2274                 if (pipe_id == 0)
2275                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2276                                         ME_PIPE0_RESET, 1);
2277                 else
2278                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2279                                         ME_PIPE1_RESET, 1);
2280                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2281
2282                 /* Clear pfp pipe0 reset bit. */
2283                 if (pipe_id == 0)
2284                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2285                                         ME_PIPE0_RESET, 0);
2286                 else
2287                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2288                                         ME_PIPE1_RESET, 0);
2289                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2290
2291                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
2292                         lower_32_bits(addr2));
2293                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
2294                         upper_32_bits(addr2));
2295         }
2296         soc21_grbm_select(adev, 0, 0, 0, 0);
2297         mutex_unlock(&adev->srbm_mutex);
2298
2299         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2300         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2301         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2302         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2303
2304         /* Invalidate the data caches */
2305         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2306         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2307         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2308
2309         for (i = 0; i < usec_timeout; i++) {
2310                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2311                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2312                         INVALIDATE_DCACHE_COMPLETE))
2313                         break;
2314                 udelay(1);
2315         }
2316
2317         if (i >= usec_timeout) {
2318                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2319                 return -EINVAL;
2320         }
2321
2322         return 0;
2323 }
2324
2325 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2326 {
2327         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2328         uint32_t tmp;
2329         unsigned i;
2330         const struct gfx_firmware_header_v2_0 *mec_hdr;
2331
2332         mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2333                 adev->gfx.mec_fw->data;
2334
2335         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2336         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2337         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2338         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2339         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2340
2341         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
2342         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
2343         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
2344         WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
2345
2346         mutex_lock(&adev->srbm_mutex);
2347         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
2348                 soc21_grbm_select(adev, 1, i, 0, 0);
2349
2350                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2);
2351                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
2352                      upper_32_bits(addr2));
2353
2354                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2355                                         mec_hdr->ucode_start_addr_lo >> 2 |
2356                                         mec_hdr->ucode_start_addr_hi << 30);
2357                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2358                                         mec_hdr->ucode_start_addr_hi >> 2);
2359
2360                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr);
2361                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2362                      upper_32_bits(addr));
2363         }
2364         mutex_unlock(&adev->srbm_mutex);
2365         soc21_grbm_select(adev, 0, 0, 0, 0);
2366
2367         /* Trigger an invalidation of the L1 instruction caches */
2368         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2369         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2370         WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
2371
2372         /* Wait for invalidation complete */
2373         for (i = 0; i < usec_timeout; i++) {
2374                 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2375                 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
2376                                        INVALIDATE_DCACHE_COMPLETE))
2377                         break;
2378                 udelay(1);
2379         }
2380
2381         if (i >= usec_timeout) {
2382                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2383                 return -EINVAL;
2384         }
2385
2386         /* Trigger an invalidation of the L1 instruction caches */
2387         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2388         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2389         WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2390
2391         /* Wait for invalidation complete */
2392         for (i = 0; i < usec_timeout; i++) {
2393                 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2394                 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2395                                        INVALIDATE_CACHE_COMPLETE))
2396                         break;
2397                 udelay(1);
2398         }
2399
2400         if (i >= usec_timeout) {
2401                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2402                 return -EINVAL;
2403         }
2404
2405         return 0;
2406 }
2407
2408 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev)
2409 {
2410         const struct gfx_firmware_header_v2_0 *pfp_hdr;
2411         const struct gfx_firmware_header_v2_0 *me_hdr;
2412         const struct gfx_firmware_header_v2_0 *mec_hdr;
2413         uint32_t pipe_id, tmp;
2414
2415         mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2416                 adev->gfx.mec_fw->data;
2417         me_hdr = (const struct gfx_firmware_header_v2_0 *)
2418                 adev->gfx.me_fw->data;
2419         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2420                 adev->gfx.pfp_fw->data;
2421
2422         /* config pfp program start addr */
2423         for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2424                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2425                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2426                         (pfp_hdr->ucode_start_addr_hi << 30) |
2427                         (pfp_hdr->ucode_start_addr_lo >> 2));
2428                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2429                         pfp_hdr->ucode_start_addr_hi >> 2);
2430         }
2431         soc21_grbm_select(adev, 0, 0, 0, 0);
2432
2433         /* reset pfp pipe */
2434         tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2435         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
2436         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
2437         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2438
2439         /* clear pfp pipe reset */
2440         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
2441         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
2442         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2443
2444         /* config me program start addr */
2445         for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2446                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2447                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2448                         (me_hdr->ucode_start_addr_hi << 30) |
2449                         (me_hdr->ucode_start_addr_lo >> 2) );
2450                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2451                         me_hdr->ucode_start_addr_hi>>2);
2452         }
2453         soc21_grbm_select(adev, 0, 0, 0, 0);
2454
2455         /* reset me pipe */
2456         tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2457         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
2458         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
2459         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2460
2461         /* clear me pipe reset */
2462         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
2463         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
2464         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2465
2466         /* config mec program start addr */
2467         for (pipe_id = 0; pipe_id < 4; pipe_id++) {
2468                 soc21_grbm_select(adev, 1, pipe_id, 0, 0);
2469                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2470                                         mec_hdr->ucode_start_addr_lo >> 2 |
2471                                         mec_hdr->ucode_start_addr_hi << 30);
2472                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2473                                         mec_hdr->ucode_start_addr_hi >> 2);
2474         }
2475         soc21_grbm_select(adev, 0, 0, 0, 0);
2476
2477         /* reset mec pipe */
2478         tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
2479         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
2480         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
2481         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
2482         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
2483         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2484
2485         /* clear mec pipe reset */
2486         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
2487         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
2488         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
2489         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
2490         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2491 }
2492
2493 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
2494 {
2495         uint32_t cp_status;
2496         uint32_t bootload_status;
2497         int i, r;
2498         uint64_t addr, addr2;
2499
2500         for (i = 0; i < adev->usec_timeout; i++) {
2501                 cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
2502
2503                 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1) ||
2504                                 adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 4))
2505                         bootload_status = RREG32_SOC15(GC, 0,
2506                                         regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
2507                 else
2508                         bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
2509
2510                 if ((cp_status == 0) &&
2511                     (REG_GET_FIELD(bootload_status,
2512                         RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
2513                         break;
2514                 }
2515                 udelay(1);
2516         }
2517
2518         if (i >= adev->usec_timeout) {
2519                 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
2520                 return -ETIMEDOUT;
2521         }
2522
2523         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
2524                 if (adev->gfx.rs64_enable) {
2525                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2526                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset;
2527                         addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2528                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset;
2529                         r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2);
2530                         if (r)
2531                                 return r;
2532                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2533                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset;
2534                         addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2535                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset;
2536                         r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2);
2537                         if (r)
2538                                 return r;
2539                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2540                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset;
2541                         addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2542                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset;
2543                         r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2);
2544                         if (r)
2545                                 return r;
2546                 } else {
2547                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2548                                 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset;
2549                         r = gfx_v11_0_config_me_cache(adev, addr);
2550                         if (r)
2551                                 return r;
2552                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2553                                 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset;
2554                         r = gfx_v11_0_config_pfp_cache(adev, addr);
2555                         if (r)
2556                                 return r;
2557                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2558                                 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset;
2559                         r = gfx_v11_0_config_mec_cache(adev, addr);
2560                         if (r)
2561                                 return r;
2562                 }
2563         }
2564
2565         return 0;
2566 }
2567
2568 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2569 {
2570         int i;
2571         u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2572
2573         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2574         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2575         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2576
2577         for (i = 0; i < adev->usec_timeout; i++) {
2578                 if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
2579                         break;
2580                 udelay(1);
2581         }
2582
2583         if (i >= adev->usec_timeout)
2584                 DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
2585
2586         return 0;
2587 }
2588
2589 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
2590 {
2591         int r;
2592         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2593         const __le32 *fw_data;
2594         unsigned i, fw_size;
2595
2596         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2597                 adev->gfx.pfp_fw->data;
2598
2599         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2600
2601         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2602                 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2603         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
2604
2605         r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
2606                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2607                                       &adev->gfx.pfp.pfp_fw_obj,
2608                                       &adev->gfx.pfp.pfp_fw_gpu_addr,
2609                                       (void **)&adev->gfx.pfp.pfp_fw_ptr);
2610         if (r) {
2611                 dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
2612                 gfx_v11_0_pfp_fini(adev);
2613                 return r;
2614         }
2615
2616         memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
2617
2618         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2619         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2620
2621         gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr);
2622
2623         WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0);
2624
2625         for (i = 0; i < pfp_hdr->jt_size; i++)
2626                 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA,
2627                              le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
2628
2629         WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2630
2631         return 0;
2632 }
2633
2634 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
2635 {
2636         int r;
2637         const struct gfx_firmware_header_v2_0 *pfp_hdr;
2638         const __le32 *fw_ucode, *fw_data;
2639         unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2640         uint32_t tmp;
2641         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2642
2643         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2644                 adev->gfx.pfp_fw->data;
2645
2646         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2647
2648         /* instruction */
2649         fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
2650                 le32_to_cpu(pfp_hdr->ucode_offset_bytes));
2651         fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
2652         /* data */
2653         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2654                 le32_to_cpu(pfp_hdr->data_offset_bytes));
2655         fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
2656
2657         /* 64kb align */
2658         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2659                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2660                                       &adev->gfx.pfp.pfp_fw_obj,
2661                                       &adev->gfx.pfp.pfp_fw_gpu_addr,
2662                                       (void **)&adev->gfx.pfp.pfp_fw_ptr);
2663         if (r) {
2664                 dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
2665                 gfx_v11_0_pfp_fini(adev);
2666                 return r;
2667         }
2668
2669         r = amdgpu_bo_create_reserved(adev, fw_data_size,
2670                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2671                                       &adev->gfx.pfp.pfp_fw_data_obj,
2672                                       &adev->gfx.pfp.pfp_fw_data_gpu_addr,
2673                                       (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
2674         if (r) {
2675                 dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
2676                 gfx_v11_0_pfp_fini(adev);
2677                 return r;
2678         }
2679
2680         memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
2681         memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
2682
2683         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2684         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
2685         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2686         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
2687
2688         if (amdgpu_emu_mode == 1)
2689                 adev->hdp.funcs->flush_hdp(adev, NULL);
2690
2691         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2692                 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2693         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2694                 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2695
2696         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2697         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2698         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2699         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2700         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2701
2702         /*
2703          * Programming any of the CP_PFP_IC_BASE registers
2704          * forces invalidation of the ME L1 I$. Wait for the
2705          * invalidation complete
2706          */
2707         for (i = 0; i < usec_timeout; i++) {
2708                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2709                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2710                         INVALIDATE_CACHE_COMPLETE))
2711                         break;
2712                 udelay(1);
2713         }
2714
2715         if (i >= usec_timeout) {
2716                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2717                 return -EINVAL;
2718         }
2719
2720         /* Prime the L1 instruction caches */
2721         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2722         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2723         WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2724         /* Waiting for cache primed*/
2725         for (i = 0; i < usec_timeout; i++) {
2726                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2727                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2728                         ICACHE_PRIMED))
2729                         break;
2730                 udelay(1);
2731         }
2732
2733         if (i >= usec_timeout) {
2734                 dev_err(adev->dev, "failed to prime instruction cache\n");
2735                 return -EINVAL;
2736         }
2737
2738         mutex_lock(&adev->srbm_mutex);
2739         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2740                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2741                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2742                         (pfp_hdr->ucode_start_addr_hi << 30) |
2743                         (pfp_hdr->ucode_start_addr_lo >> 2) );
2744                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2745                         pfp_hdr->ucode_start_addr_hi>>2);
2746
2747                 /*
2748                  * Program CP_ME_CNTL to reset given PIPE to take
2749                  * effect of CP_PFP_PRGRM_CNTR_START.
2750                  */
2751                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2752                 if (pipe_id == 0)
2753                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2754                                         PFP_PIPE0_RESET, 1);
2755                 else
2756                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2757                                         PFP_PIPE1_RESET, 1);
2758                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2759
2760                 /* Clear pfp pipe0 reset bit. */
2761                 if (pipe_id == 0)
2762                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2763                                         PFP_PIPE0_RESET, 0);
2764                 else
2765                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2766                                         PFP_PIPE1_RESET, 0);
2767                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2768
2769                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2770                         lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2771                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2772                         upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2773         }
2774         soc21_grbm_select(adev, 0, 0, 0, 0);
2775         mutex_unlock(&adev->srbm_mutex);
2776
2777         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2778         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2779         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2780         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2781
2782         /* Invalidate the data caches */
2783         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2784         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2785         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2786
2787         for (i = 0; i < usec_timeout; i++) {
2788                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2789                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2790                         INVALIDATE_DCACHE_COMPLETE))
2791                         break;
2792                 udelay(1);
2793         }
2794
2795         if (i >= usec_timeout) {
2796                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2797                 return -EINVAL;
2798         }
2799
2800         return 0;
2801 }
2802
2803 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
2804 {
2805         int r;
2806         const struct gfx_firmware_header_v1_0 *me_hdr;
2807         const __le32 *fw_data;
2808         unsigned i, fw_size;
2809
2810         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2811                 adev->gfx.me_fw->data;
2812
2813         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2814
2815         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
2816                 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2817         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
2818
2819         r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
2820                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2821                                       &adev->gfx.me.me_fw_obj,
2822                                       &adev->gfx.me.me_fw_gpu_addr,
2823                                       (void **)&adev->gfx.me.me_fw_ptr);
2824         if (r) {
2825                 dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
2826                 gfx_v11_0_me_fini(adev);
2827                 return r;
2828         }
2829
2830         memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
2831
2832         amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
2833         amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
2834
2835         gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr);
2836
2837         WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0);
2838
2839         for (i = 0; i < me_hdr->jt_size; i++)
2840                 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA,
2841                              le32_to_cpup(fw_data + me_hdr->jt_offset + i));
2842
2843         WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
2844
2845         return 0;
2846 }
2847
2848 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
2849 {
2850         int r;
2851         const struct gfx_firmware_header_v2_0 *me_hdr;
2852         const __le32 *fw_ucode, *fw_data;
2853         unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2854         uint32_t tmp;
2855         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2856
2857         me_hdr = (const struct gfx_firmware_header_v2_0 *)
2858                 adev->gfx.me_fw->data;
2859
2860         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2861
2862         /* instruction */
2863         fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
2864                 le32_to_cpu(me_hdr->ucode_offset_bytes));
2865         fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
2866         /* data */
2867         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
2868                 le32_to_cpu(me_hdr->data_offset_bytes));
2869         fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
2870
2871         /* 64kb align*/
2872         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2873                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2874                                       &adev->gfx.me.me_fw_obj,
2875                                       &adev->gfx.me.me_fw_gpu_addr,
2876                                       (void **)&adev->gfx.me.me_fw_ptr);
2877         if (r) {
2878                 dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
2879                 gfx_v11_0_me_fini(adev);
2880                 return r;
2881         }
2882
2883         r = amdgpu_bo_create_reserved(adev, fw_data_size,
2884                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2885                                       &adev->gfx.me.me_fw_data_obj,
2886                                       &adev->gfx.me.me_fw_data_gpu_addr,
2887                                       (void **)&adev->gfx.me.me_fw_data_ptr);
2888         if (r) {
2889                 dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
2890                 gfx_v11_0_pfp_fini(adev);
2891                 return r;
2892         }
2893
2894         memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
2895         memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
2896
2897         amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
2898         amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
2899         amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
2900         amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
2901
2902         if (amdgpu_emu_mode == 1)
2903                 adev->hdp.funcs->flush_hdp(adev, NULL);
2904
2905         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2906                 lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
2907         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2908                 upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
2909
2910         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2911         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2912         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2913         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2914         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2915
2916         /*
2917          * Programming any of the CP_ME_IC_BASE registers
2918          * forces invalidation of the ME L1 I$. Wait for the
2919          * invalidation complete
2920          */
2921         for (i = 0; i < usec_timeout; i++) {
2922                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2923                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2924                         INVALIDATE_CACHE_COMPLETE))
2925                         break;
2926                 udelay(1);
2927         }
2928
2929         if (i >= usec_timeout) {
2930                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2931                 return -EINVAL;
2932         }
2933
2934         /* Prime the instruction caches */
2935         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2936         tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
2937         WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2938
2939         /* Waiting for instruction cache primed*/
2940         for (i = 0; i < usec_timeout; i++) {
2941                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2942                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2943                         ICACHE_PRIMED))
2944                         break;
2945                 udelay(1);
2946         }
2947
2948         if (i >= usec_timeout) {
2949                 dev_err(adev->dev, "failed to prime instruction cache\n");
2950                 return -EINVAL;
2951         }
2952
2953         mutex_lock(&adev->srbm_mutex);
2954         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2955                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2956                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2957                         (me_hdr->ucode_start_addr_hi << 30) |
2958                         (me_hdr->ucode_start_addr_lo >> 2) );
2959                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2960                         me_hdr->ucode_start_addr_hi>>2);
2961
2962                 /*
2963                  * Program CP_ME_CNTL to reset given PIPE to take
2964                  * effect of CP_PFP_PRGRM_CNTR_START.
2965                  */
2966                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2967                 if (pipe_id == 0)
2968                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2969                                         ME_PIPE0_RESET, 1);
2970                 else
2971                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2972                                         ME_PIPE1_RESET, 1);
2973                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2974
2975                 /* Clear pfp pipe0 reset bit. */
2976                 if (pipe_id == 0)
2977                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2978                                         ME_PIPE0_RESET, 0);
2979                 else
2980                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2981                                         ME_PIPE1_RESET, 0);
2982                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2983
2984                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
2985                         lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
2986                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
2987                         upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
2988         }
2989         soc21_grbm_select(adev, 0, 0, 0, 0);
2990         mutex_unlock(&adev->srbm_mutex);
2991
2992         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2993         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2994         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2995         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2996
2997         /* Invalidate the data caches */
2998         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2999         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3000         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
3001
3002         for (i = 0; i < usec_timeout; i++) {
3003                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3004                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
3005                         INVALIDATE_DCACHE_COMPLETE))
3006                         break;
3007                 udelay(1);
3008         }
3009
3010         if (i >= usec_timeout) {
3011                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
3012                 return -EINVAL;
3013         }
3014
3015         return 0;
3016 }
3017
3018 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3019 {
3020         int r;
3021
3022         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
3023                 return -EINVAL;
3024
3025         gfx_v11_0_cp_gfx_enable(adev, false);
3026
3027         if (adev->gfx.rs64_enable)
3028                 r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev);
3029         else
3030                 r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev);
3031         if (r) {
3032                 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
3033                 return r;
3034         }
3035
3036         if (adev->gfx.rs64_enable)
3037                 r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev);
3038         else
3039                 r = gfx_v11_0_cp_gfx_load_me_microcode(adev);
3040         if (r) {
3041                 dev_err(adev->dev, "(%d) failed to load me fw\n", r);
3042                 return r;
3043         }
3044
3045         return 0;
3046 }
3047
3048 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev)
3049 {
3050         struct amdgpu_ring *ring;
3051         const struct cs_section_def *sect = NULL;
3052         const struct cs_extent_def *ext = NULL;
3053         int r, i;
3054         int ctx_reg_offset;
3055
3056         /* init the CP */
3057         WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
3058                      adev->gfx.config.max_hw_contexts - 1);
3059         WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
3060
3061         if (!amdgpu_async_gfx_ring)
3062                 gfx_v11_0_cp_gfx_enable(adev, true);
3063
3064         ring = &adev->gfx.gfx_ring[0];
3065         r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev));
3066         if (r) {
3067                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3068                 return r;
3069         }
3070
3071         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3072         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3073
3074         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3075         amdgpu_ring_write(ring, 0x80000000);
3076         amdgpu_ring_write(ring, 0x80000000);
3077
3078         for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
3079                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3080                         if (sect->id == SECT_CONTEXT) {
3081                                 amdgpu_ring_write(ring,
3082                                                   PACKET3(PACKET3_SET_CONTEXT_REG,
3083                                                           ext->reg_count));
3084                                 amdgpu_ring_write(ring, ext->reg_index -
3085                                                   PACKET3_SET_CONTEXT_REG_START);
3086                                 for (i = 0; i < ext->reg_count; i++)
3087                                         amdgpu_ring_write(ring, ext->extent[i]);
3088                         }
3089                 }
3090         }
3091
3092         ctx_reg_offset =
3093                 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
3094         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
3095         amdgpu_ring_write(ring, ctx_reg_offset);
3096         amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
3097
3098         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3099         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3100
3101         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3102         amdgpu_ring_write(ring, 0);
3103
3104         amdgpu_ring_commit(ring);
3105
3106         /* submit cs packet to copy state 0 to next available state */
3107         if (adev->gfx.num_gfx_rings > 1) {
3108                 /* maximum supported gfx ring is 2 */
3109                 ring = &adev->gfx.gfx_ring[1];
3110                 r = amdgpu_ring_alloc(ring, 2);
3111                 if (r) {
3112                         DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3113                         return r;
3114                 }
3115
3116                 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3117                 amdgpu_ring_write(ring, 0);
3118
3119                 amdgpu_ring_commit(ring);
3120         }
3121         return 0;
3122 }
3123
3124 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
3125                                          CP_PIPE_ID pipe)
3126 {
3127         u32 tmp;
3128
3129         tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
3130         tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
3131
3132         WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
3133 }
3134
3135 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
3136                                           struct amdgpu_ring *ring)
3137 {
3138         u32 tmp;
3139
3140         tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3141         if (ring->use_doorbell) {
3142                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3143                                     DOORBELL_OFFSET, ring->doorbell_index);
3144                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3145                                     DOORBELL_EN, 1);
3146         } else {
3147                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3148                                     DOORBELL_EN, 0);
3149         }
3150         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
3151
3152         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3153                             DOORBELL_RANGE_LOWER, ring->doorbell_index);
3154         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
3155
3156         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3157                      CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3158 }
3159
3160 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
3161 {
3162         struct amdgpu_ring *ring;
3163         u32 tmp;
3164         u32 rb_bufsz;
3165         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3166         u32 i;
3167
3168         /* Set the write pointer delay */
3169         WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
3170
3171         /* set the RB to use vmid 0 */
3172         WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
3173
3174         /* Init gfx ring 0 for pipe 0 */
3175         mutex_lock(&adev->srbm_mutex);
3176         gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3177
3178         /* Set ring buffer size */
3179         ring = &adev->gfx.gfx_ring[0];
3180         rb_bufsz = order_base_2(ring->ring_size / 8);
3181         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3182         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3183         WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3184
3185         /* Initialize the ring buffer's write pointers */
3186         ring->wptr = 0;
3187         WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
3188         WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3189
3190         /* set the wb address wether it's enabled or not */
3191         rptr_addr = ring->rptr_gpu_addr;
3192         WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3193         WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3194                      CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3195
3196         wptr_gpu_addr = ring->wptr_gpu_addr;
3197         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3198                      lower_32_bits(wptr_gpu_addr));
3199         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3200                      upper_32_bits(wptr_gpu_addr));
3201
3202         mdelay(1);
3203         WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3204
3205         rb_addr = ring->gpu_addr >> 8;
3206         WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
3207         WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3208
3209         WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
3210
3211         gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3212         mutex_unlock(&adev->srbm_mutex);
3213
3214         /* Init gfx ring 1 for pipe 1 */
3215         if (adev->gfx.num_gfx_rings > 1) {
3216                 mutex_lock(&adev->srbm_mutex);
3217                 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
3218                 /* maximum supported gfx ring is 2 */
3219                 ring = &adev->gfx.gfx_ring[1];
3220                 rb_bufsz = order_base_2(ring->ring_size / 8);
3221                 tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
3222                 tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
3223                 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3224                 /* Initialize the ring buffer's write pointers */
3225                 ring->wptr = 0;
3226                 WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
3227                 WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
3228                 /* Set the wb address wether it's enabled or not */
3229                 rptr_addr = ring->rptr_gpu_addr;
3230                 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
3231                 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3232                              CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3233                 wptr_gpu_addr = ring->wptr_gpu_addr;
3234                 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3235                              lower_32_bits(wptr_gpu_addr));
3236                 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3237                              upper_32_bits(wptr_gpu_addr));
3238
3239                 mdelay(1);
3240                 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3241
3242                 rb_addr = ring->gpu_addr >> 8;
3243                 WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr);
3244                 WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr));
3245                 WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1);
3246
3247                 gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3248                 mutex_unlock(&adev->srbm_mutex);
3249         }
3250         /* Switch to pipe 0 */
3251         mutex_lock(&adev->srbm_mutex);
3252         gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3253         mutex_unlock(&adev->srbm_mutex);
3254
3255         /* start the ring */
3256         gfx_v11_0_cp_gfx_start(adev);
3257
3258         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3259                 ring = &adev->gfx.gfx_ring[i];
3260                 ring->sched.ready = true;
3261         }
3262
3263         return 0;
3264 }
3265
3266 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3267 {
3268         u32 data;
3269
3270         if (adev->gfx.rs64_enable) {
3271                 data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
3272                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
3273                                                          enable ? 0 : 1);
3274                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
3275                                                          enable ? 0 : 1);
3276                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
3277                                                          enable ? 0 : 1);
3278                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
3279                                                          enable ? 0 : 1);
3280                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
3281                                                          enable ? 0 : 1);
3282                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
3283                                                          enable ? 1 : 0);
3284                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
3285                                                          enable ? 1 : 0);
3286                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
3287                                                          enable ? 1 : 0);
3288                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
3289                                                          enable ? 1 : 0);
3290                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
3291                                                          enable ? 0 : 1);
3292                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
3293         } else {
3294                 data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
3295
3296                 if (enable) {
3297                         data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0);
3298                         if (!adev->enable_mes_kiq)
3299                                 data = REG_SET_FIELD(data, CP_MEC_CNTL,
3300                                                      MEC_ME2_HALT, 0);
3301                 } else {
3302                         data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1);
3303                         data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1);
3304                 }
3305                 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data);
3306         }
3307
3308         adev->gfx.kiq.ring.sched.ready = enable;
3309
3310         udelay(50);
3311 }
3312
3313 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3314 {
3315         const struct gfx_firmware_header_v1_0 *mec_hdr;
3316         const __le32 *fw_data;
3317         unsigned i, fw_size;
3318         u32 *fw = NULL;
3319         int r;
3320
3321         if (!adev->gfx.mec_fw)
3322                 return -EINVAL;
3323
3324         gfx_v11_0_cp_compute_enable(adev, false);
3325
3326         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3327         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3328
3329         fw_data = (const __le32 *)
3330                 (adev->gfx.mec_fw->data +
3331                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3332         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
3333
3334         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
3335                                           PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3336                                           &adev->gfx.mec.mec_fw_obj,
3337                                           &adev->gfx.mec.mec_fw_gpu_addr,
3338                                           (void **)&fw);
3339         if (r) {
3340                 dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
3341                 gfx_v11_0_mec_fini(adev);
3342                 return r;
3343         }
3344
3345         memcpy(fw, fw_data, fw_size);
3346         
3347         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3348         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3349
3350         gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr);
3351
3352         /* MEC1 */
3353         WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0);
3354
3355         for (i = 0; i < mec_hdr->jt_size; i++)
3356                 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA,
3357                              le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3358
3359         WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3360
3361         return 0;
3362 }
3363
3364 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
3365 {
3366         const struct gfx_firmware_header_v2_0 *mec_hdr;
3367         const __le32 *fw_ucode, *fw_data;
3368         u32 tmp, fw_ucode_size, fw_data_size;
3369         u32 i, usec_timeout = 50000; /* Wait for 50 ms */
3370         u32 *fw_ucode_ptr, *fw_data_ptr;
3371         int r;
3372
3373         if (!adev->gfx.mec_fw)
3374                 return -EINVAL;
3375
3376         gfx_v11_0_cp_compute_enable(adev, false);
3377
3378         mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
3379         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3380
3381         fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
3382                                 le32_to_cpu(mec_hdr->ucode_offset_bytes));
3383         fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
3384
3385         fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
3386                                 le32_to_cpu(mec_hdr->data_offset_bytes));
3387         fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
3388
3389         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3390                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
3391                                       &adev->gfx.mec.mec_fw_obj,
3392                                       &adev->gfx.mec.mec_fw_gpu_addr,
3393                                       (void **)&fw_ucode_ptr);
3394         if (r) {
3395                 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3396                 gfx_v11_0_mec_fini(adev);
3397                 return r;
3398         }
3399
3400         r = amdgpu_bo_create_reserved(adev, fw_data_size,
3401                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
3402                                       &adev->gfx.mec.mec_fw_data_obj,
3403                                       &adev->gfx.mec.mec_fw_data_gpu_addr,
3404                                       (void **)&fw_data_ptr);
3405         if (r) {
3406                 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3407                 gfx_v11_0_mec_fini(adev);
3408                 return r;
3409         }
3410
3411         memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
3412         memcpy(fw_data_ptr, fw_data, fw_data_size);
3413
3414         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3415         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
3416         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3417         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
3418
3419         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
3420         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3421         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
3422         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3423         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
3424
3425         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
3426         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
3427         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
3428         WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
3429
3430         mutex_lock(&adev->srbm_mutex);
3431         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
3432                 soc21_grbm_select(adev, 1, i, 0, 0);
3433
3434                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr);
3435                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
3436                      upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr));
3437
3438                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
3439                                         mec_hdr->ucode_start_addr_lo >> 2 |
3440                                         mec_hdr->ucode_start_addr_hi << 30);
3441                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
3442                                         mec_hdr->ucode_start_addr_hi >> 2);
3443
3444                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr);
3445                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
3446                      upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3447         }
3448         mutex_unlock(&adev->srbm_mutex);
3449         soc21_grbm_select(adev, 0, 0, 0, 0);
3450
3451         /* Trigger an invalidation of the L1 instruction caches */
3452         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3453         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3454         WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
3455
3456         /* Wait for invalidation complete */
3457         for (i = 0; i < usec_timeout; i++) {
3458                 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3459                 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
3460                                        INVALIDATE_DCACHE_COMPLETE))
3461                         break;
3462                 udelay(1);
3463         }
3464
3465         if (i >= usec_timeout) {
3466                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
3467                 return -EINVAL;
3468         }
3469
3470         /* Trigger an invalidation of the L1 instruction caches */
3471         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3472         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
3473         WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
3474
3475         /* Wait for invalidation complete */
3476         for (i = 0; i < usec_timeout; i++) {
3477                 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3478                 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
3479                                        INVALIDATE_CACHE_COMPLETE))
3480                         break;
3481                 udelay(1);
3482         }
3483
3484         if (i >= usec_timeout) {
3485                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
3486                 return -EINVAL;
3487         }
3488
3489         return 0;
3490 }
3491
3492 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
3493 {
3494         uint32_t tmp;
3495         struct amdgpu_device *adev = ring->adev;
3496
3497         /* tell RLC which is KIQ queue */
3498         tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
3499         tmp &= 0xffffff00;
3500         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3501         WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
3502         tmp |= 0x80;
3503         WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
3504 }
3505
3506 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
3507 {
3508         /* set graphics engine doorbell range */
3509         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
3510                      (adev->doorbell_index.gfx_ring0 * 2) << 2);
3511         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3512                      (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
3513
3514         /* set compute engine doorbell range */
3515         WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
3516                      (adev->doorbell_index.kiq * 2) << 2);
3517         WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
3518                      (adev->doorbell_index.userqueue_end * 2) << 2);
3519 }
3520
3521 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
3522                                   struct amdgpu_mqd_prop *prop)
3523 {
3524         struct v11_gfx_mqd *mqd = m;
3525         uint64_t hqd_gpu_addr, wb_gpu_addr;
3526         uint32_t tmp;
3527         uint32_t rb_bufsz;
3528
3529         /* set up gfx hqd wptr */
3530         mqd->cp_gfx_hqd_wptr = 0;
3531         mqd->cp_gfx_hqd_wptr_hi = 0;
3532
3533         /* set the pointer to the MQD */
3534         mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
3535         mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
3536
3537         /* set up mqd control */
3538         tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
3539         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
3540         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
3541         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
3542         mqd->cp_gfx_mqd_control = tmp;
3543
3544         /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
3545         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
3546         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
3547         mqd->cp_gfx_hqd_vmid = 0;
3548
3549         /* set up default queue priority level
3550          * 0x0 = low priority, 0x1 = high priority */
3551         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
3552         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
3553         mqd->cp_gfx_hqd_queue_priority = tmp;
3554
3555         /* set up time quantum */
3556         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
3557         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
3558         mqd->cp_gfx_hqd_quantum = tmp;
3559
3560         /* set up gfx hqd base. this is similar as CP_RB_BASE */
3561         hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
3562         mqd->cp_gfx_hqd_base = hqd_gpu_addr;
3563         mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
3564
3565         /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
3566         wb_gpu_addr = prop->rptr_gpu_addr;
3567         mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
3568         mqd->cp_gfx_hqd_rptr_addr_hi =
3569                 upper_32_bits(wb_gpu_addr) & 0xffff;
3570
3571         /* set up rb_wptr_poll addr */
3572         wb_gpu_addr = prop->wptr_gpu_addr;
3573         mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3574         mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3575
3576         /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
3577         rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
3578         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
3579         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
3580         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
3581 #ifdef __BIG_ENDIAN
3582         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
3583 #endif
3584         mqd->cp_gfx_hqd_cntl = tmp;
3585
3586         /* set up cp_doorbell_control */
3587         tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3588         if (prop->use_doorbell) {
3589                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3590                                     DOORBELL_OFFSET, prop->doorbell_index);
3591                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3592                                     DOORBELL_EN, 1);
3593         } else
3594                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3595                                     DOORBELL_EN, 0);
3596         mqd->cp_rb_doorbell_control = tmp;
3597
3598         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3599         mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
3600
3601         /* active the queue */
3602         mqd->cp_gfx_hqd_active = 1;
3603
3604         return 0;
3605 }
3606
3607 #ifdef BRING_UP_DEBUG
3608 static int gfx_v11_0_gfx_queue_init_register(struct amdgpu_ring *ring)
3609 {
3610         struct amdgpu_device *adev = ring->adev;
3611         struct v11_gfx_mqd *mqd = ring->mqd_ptr;
3612
3613         /* set mmCP_GFX_HQD_WPTR/_HI to 0 */
3614         WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr);
3615         WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi);
3616
3617         /* set GFX_MQD_BASE */
3618         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr);
3619         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3620
3621         /* set GFX_MQD_CONTROL */
3622         WREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control);
3623
3624         /* set GFX_HQD_VMID to 0 */
3625         WREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid);
3626
3627         WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY,
3628                         mqd->cp_gfx_hqd_queue_priority);
3629         WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum);
3630
3631         /* set GFX_HQD_BASE, similar as CP_RB_BASE */
3632         WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base);
3633         WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi);
3634
3635         /* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */
3636         WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr);
3637         WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi);
3638
3639         /* set GFX_HQD_CNTL, similar as CP_RB_CNTL */
3640         WREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl);
3641
3642         /* set RB_WPTR_POLL_ADDR */
3643         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo);
3644         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi);
3645
3646         /* set RB_DOORBELL_CONTROL */
3647         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control);
3648
3649         /* active the queue */
3650         WREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active);
3651
3652         return 0;
3653 }
3654 #endif
3655
3656 static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
3657 {
3658         struct amdgpu_device *adev = ring->adev;
3659         struct v11_gfx_mqd *mqd = ring->mqd_ptr;
3660         int mqd_idx = ring - &adev->gfx.gfx_ring[0];
3661
3662         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3663                 memset((void *)mqd, 0, sizeof(*mqd));
3664                 mutex_lock(&adev->srbm_mutex);
3665                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3666                 amdgpu_ring_init_mqd(ring);
3667 #ifdef BRING_UP_DEBUG
3668                 gfx_v11_0_gfx_queue_init_register(ring);
3669 #endif
3670                 soc21_grbm_select(adev, 0, 0, 0, 0);
3671                 mutex_unlock(&adev->srbm_mutex);
3672                 if (adev->gfx.me.mqd_backup[mqd_idx])
3673                         memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
3674         } else if (amdgpu_in_reset(adev)) {
3675                 /* reset mqd with the backup copy */
3676                 if (adev->gfx.me.mqd_backup[mqd_idx])
3677                         memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
3678                 /* reset the ring */
3679                 ring->wptr = 0;
3680                 *ring->wptr_cpu_addr = 0;
3681                 amdgpu_ring_clear_ring(ring);
3682 #ifdef BRING_UP_DEBUG
3683                 mutex_lock(&adev->srbm_mutex);
3684                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3685                 gfx_v11_0_gfx_queue_init_register(ring);
3686                 soc21_grbm_select(adev, 0, 0, 0, 0);
3687                 mutex_unlock(&adev->srbm_mutex);
3688 #endif
3689         } else {
3690                 amdgpu_ring_clear_ring(ring);
3691         }
3692
3693         return 0;
3694 }
3695
3696 #ifndef BRING_UP_DEBUG
3697 static int gfx_v11_0_kiq_enable_kgq(struct amdgpu_device *adev)
3698 {
3699         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
3700         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3701         int r, i;
3702
3703         if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
3704                 return -EINVAL;
3705
3706         r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
3707                                         adev->gfx.num_gfx_rings);
3708         if (r) {
3709                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3710                 return r;
3711         }
3712
3713         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3714                 kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]);
3715
3716         return amdgpu_ring_test_helper(kiq_ring);
3717 }
3718 #endif
3719
3720 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
3721 {
3722         int r, i;
3723         struct amdgpu_ring *ring;
3724
3725         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3726                 ring = &adev->gfx.gfx_ring[i];
3727
3728                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3729                 if (unlikely(r != 0))
3730                         goto done;
3731
3732                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3733                 if (!r) {
3734                         r = gfx_v11_0_gfx_init_queue(ring);
3735                         amdgpu_bo_kunmap(ring->mqd_obj);
3736                         ring->mqd_ptr = NULL;
3737                 }
3738                 amdgpu_bo_unreserve(ring->mqd_obj);
3739                 if (r)
3740                         goto done;
3741         }
3742 #ifndef BRING_UP_DEBUG
3743         r = gfx_v11_0_kiq_enable_kgq(adev);
3744         if (r)
3745                 goto done;
3746 #endif
3747         r = gfx_v11_0_cp_gfx_start(adev);
3748         if (r)
3749                 goto done;
3750
3751         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3752                 ring = &adev->gfx.gfx_ring[i];
3753                 ring->sched.ready = true;
3754         }
3755 done:
3756         return r;
3757 }
3758
3759 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
3760                                       struct amdgpu_mqd_prop *prop)
3761 {
3762         struct v11_compute_mqd *mqd = m;
3763         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3764         uint32_t tmp;
3765
3766         mqd->header = 0xC0310800;
3767         mqd->compute_pipelinestat_enable = 0x00000001;
3768         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3769         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3770         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3771         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3772         mqd->compute_misc_reserved = 0x00000007;
3773
3774         eop_base_addr = prop->eop_gpu_addr >> 8;
3775         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3776         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3777
3778         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3779         tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL);
3780         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3781                         (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
3782
3783         mqd->cp_hqd_eop_control = tmp;
3784
3785         /* enable doorbell? */
3786         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3787
3788         if (prop->use_doorbell) {
3789                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3790                                     DOORBELL_OFFSET, prop->doorbell_index);
3791                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3792                                     DOORBELL_EN, 1);
3793                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3794                                     DOORBELL_SOURCE, 0);
3795                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3796                                     DOORBELL_HIT, 0);
3797         } else {
3798                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3799                                     DOORBELL_EN, 0);
3800         }
3801
3802         mqd->cp_hqd_pq_doorbell_control = tmp;
3803
3804         /* disable the queue if it's active */
3805         mqd->cp_hqd_dequeue_request = 0;
3806         mqd->cp_hqd_pq_rptr = 0;
3807         mqd->cp_hqd_pq_wptr_lo = 0;
3808         mqd->cp_hqd_pq_wptr_hi = 0;
3809
3810         /* set the pointer to the MQD */
3811         mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
3812         mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
3813
3814         /* set MQD vmid to 0 */
3815         tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
3816         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3817         mqd->cp_mqd_control = tmp;
3818
3819         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3820         hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
3821         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3822         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3823
3824         /* set up the HQD, this is similar to CP_RB0_CNTL */
3825         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL);
3826         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3827                             (order_base_2(prop->queue_size / 4) - 1));
3828         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3829                             (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3830         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3831         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
3832         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3833         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3834         mqd->cp_hqd_pq_control = tmp;
3835
3836         /* set the wb address whether it's enabled or not */
3837         wb_gpu_addr = prop->rptr_gpu_addr;
3838         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3839         mqd->cp_hqd_pq_rptr_report_addr_hi =
3840                 upper_32_bits(wb_gpu_addr) & 0xffff;
3841
3842         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3843         wb_gpu_addr = prop->wptr_gpu_addr;
3844         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3845         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3846
3847         tmp = 0;
3848         /* enable the doorbell if requested */
3849         if (prop->use_doorbell) {
3850                 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3851                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3852                                 DOORBELL_OFFSET, prop->doorbell_index);
3853
3854                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3855                                     DOORBELL_EN, 1);
3856                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3857                                     DOORBELL_SOURCE, 0);
3858                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3859                                     DOORBELL_HIT, 0);
3860         }
3861
3862         mqd->cp_hqd_pq_doorbell_control = tmp;
3863
3864         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3865         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR);
3866
3867         /* set the vmid for the queue */
3868         mqd->cp_hqd_vmid = 0;
3869
3870         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE);
3871         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
3872         mqd->cp_hqd_persistent_state = tmp;
3873
3874         /* set MIN_IB_AVAIL_SIZE */
3875         tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL);
3876         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3877         mqd->cp_hqd_ib_control = tmp;
3878
3879         /* set static priority for a compute queue/ring */
3880         mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
3881         mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
3882
3883         mqd->cp_hqd_active = prop->hqd_active;
3884
3885         return 0;
3886 }
3887
3888 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring)
3889 {
3890         struct amdgpu_device *adev = ring->adev;
3891         struct v11_compute_mqd *mqd = ring->mqd_ptr;
3892         int j;
3893
3894         /* inactivate the queue */
3895         if (amdgpu_sriov_vf(adev))
3896                 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
3897
3898         /* disable wptr polling */
3899         WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3900
3901         /* write the EOP addr */
3902         WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
3903                mqd->cp_hqd_eop_base_addr_lo);
3904         WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
3905                mqd->cp_hqd_eop_base_addr_hi);
3906
3907         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3908         WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
3909                mqd->cp_hqd_eop_control);
3910
3911         /* enable doorbell? */
3912         WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
3913                mqd->cp_hqd_pq_doorbell_control);
3914
3915         /* disable the queue if it's active */
3916         if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
3917                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
3918                 for (j = 0; j < adev->usec_timeout; j++) {
3919                         if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
3920                                 break;
3921                         udelay(1);
3922                 }
3923                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
3924                        mqd->cp_hqd_dequeue_request);
3925                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
3926                        mqd->cp_hqd_pq_rptr);
3927                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
3928                        mqd->cp_hqd_pq_wptr_lo);
3929                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
3930                        mqd->cp_hqd_pq_wptr_hi);
3931         }
3932
3933         /* set the pointer to the MQD */
3934         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
3935                mqd->cp_mqd_base_addr_lo);
3936         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
3937                mqd->cp_mqd_base_addr_hi);
3938
3939         /* set MQD vmid to 0 */
3940         WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
3941                mqd->cp_mqd_control);
3942
3943         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3944         WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
3945                mqd->cp_hqd_pq_base_lo);
3946         WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
3947                mqd->cp_hqd_pq_base_hi);
3948
3949         /* set up the HQD, this is similar to CP_RB0_CNTL */
3950         WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
3951                mqd->cp_hqd_pq_control);
3952
3953         /* set the wb address whether it's enabled or not */
3954         WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
3955                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3956         WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3957                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3958
3959         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3960         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
3961                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3962         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3963                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3964
3965         /* enable the doorbell if requested */
3966         if (ring->use_doorbell) {
3967                 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
3968                         (adev->doorbell_index.kiq * 2) << 2);
3969                 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
3970                         (adev->doorbell_index.userqueue_end * 2) << 2);
3971         }
3972
3973         WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
3974                mqd->cp_hqd_pq_doorbell_control);
3975
3976         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3977         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
3978                mqd->cp_hqd_pq_wptr_lo);
3979         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
3980                mqd->cp_hqd_pq_wptr_hi);
3981
3982         /* set the vmid for the queue */
3983         WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
3984
3985         WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
3986                mqd->cp_hqd_persistent_state);
3987
3988         /* activate the queue */
3989         WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
3990                mqd->cp_hqd_active);
3991
3992         if (ring->use_doorbell)
3993                 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3994
3995         return 0;
3996 }
3997
3998 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
3999 {
4000         struct amdgpu_device *adev = ring->adev;
4001         struct v11_compute_mqd *mqd = ring->mqd_ptr;
4002         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4003
4004         gfx_v11_0_kiq_setting(ring);
4005
4006         if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4007                 /* reset MQD to a clean status */
4008                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4009                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4010
4011                 /* reset ring buffer */
4012                 ring->wptr = 0;
4013                 amdgpu_ring_clear_ring(ring);
4014
4015                 mutex_lock(&adev->srbm_mutex);
4016                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4017                 gfx_v11_0_kiq_init_register(ring);
4018                 soc21_grbm_select(adev, 0, 0, 0, 0);
4019                 mutex_unlock(&adev->srbm_mutex);
4020         } else {
4021                 memset((void *)mqd, 0, sizeof(*mqd));
4022                 mutex_lock(&adev->srbm_mutex);
4023                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4024                 amdgpu_ring_init_mqd(ring);
4025                 gfx_v11_0_kiq_init_register(ring);
4026                 soc21_grbm_select(adev, 0, 0, 0, 0);
4027                 mutex_unlock(&adev->srbm_mutex);
4028
4029                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4030                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4031         }
4032
4033         return 0;
4034 }
4035
4036 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring)
4037 {
4038         struct amdgpu_device *adev = ring->adev;
4039         struct v11_compute_mqd *mqd = ring->mqd_ptr;
4040         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4041
4042         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4043                 memset((void *)mqd, 0, sizeof(*mqd));
4044                 mutex_lock(&adev->srbm_mutex);
4045                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4046                 amdgpu_ring_init_mqd(ring);
4047                 soc21_grbm_select(adev, 0, 0, 0, 0);
4048                 mutex_unlock(&adev->srbm_mutex);
4049
4050                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4051                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4052         } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4053                 /* reset MQD to a clean status */
4054                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4055                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4056
4057                 /* reset ring buffer */
4058                 ring->wptr = 0;
4059                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
4060                 amdgpu_ring_clear_ring(ring);
4061         } else {
4062                 amdgpu_ring_clear_ring(ring);
4063         }
4064
4065         return 0;
4066 }
4067
4068 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
4069 {
4070         struct amdgpu_ring *ring;
4071         int r;
4072
4073         ring = &adev->gfx.kiq.ring;
4074
4075         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4076         if (unlikely(r != 0))
4077                 return r;
4078
4079         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4080         if (unlikely(r != 0)) {
4081                 amdgpu_bo_unreserve(ring->mqd_obj);
4082                 return r;
4083         }
4084
4085         gfx_v11_0_kiq_init_queue(ring);
4086         amdgpu_bo_kunmap(ring->mqd_obj);
4087         ring->mqd_ptr = NULL;
4088         amdgpu_bo_unreserve(ring->mqd_obj);
4089         ring->sched.ready = true;
4090         return 0;
4091 }
4092
4093 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
4094 {
4095         struct amdgpu_ring *ring = NULL;
4096         int r = 0, i;
4097
4098         if (!amdgpu_async_gfx_ring)
4099                 gfx_v11_0_cp_compute_enable(adev, true);
4100
4101         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4102                 ring = &adev->gfx.compute_ring[i];
4103
4104                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4105                 if (unlikely(r != 0))
4106                         goto done;
4107                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4108                 if (!r) {
4109                         r = gfx_v11_0_kcq_init_queue(ring);
4110                         amdgpu_bo_kunmap(ring->mqd_obj);
4111                         ring->mqd_ptr = NULL;
4112                 }
4113                 amdgpu_bo_unreserve(ring->mqd_obj);
4114                 if (r)
4115                         goto done;
4116         }
4117
4118         r = amdgpu_gfx_enable_kcq(adev);
4119 done:
4120         return r;
4121 }
4122
4123 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
4124 {
4125         int r, i;
4126         struct amdgpu_ring *ring;
4127
4128         if (!(adev->flags & AMD_IS_APU))
4129                 gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4130
4131         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4132                 /* legacy firmware loading */
4133                 r = gfx_v11_0_cp_gfx_load_microcode(adev);
4134                 if (r)
4135                         return r;
4136
4137                 if (adev->gfx.rs64_enable)
4138                         r = gfx_v11_0_cp_compute_load_microcode_rs64(adev);
4139                 else
4140                         r = gfx_v11_0_cp_compute_load_microcode(adev);
4141                 if (r)
4142                         return r;
4143         }
4144
4145         gfx_v11_0_cp_set_doorbell_range(adev);
4146
4147         if (amdgpu_async_gfx_ring) {
4148                 gfx_v11_0_cp_compute_enable(adev, true);
4149                 gfx_v11_0_cp_gfx_enable(adev, true);
4150         }
4151
4152         if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
4153                 r = amdgpu_mes_kiq_hw_init(adev);
4154         else
4155                 r = gfx_v11_0_kiq_resume(adev);
4156         if (r)
4157                 return r;
4158
4159         r = gfx_v11_0_kcq_resume(adev);
4160         if (r)
4161                 return r;
4162
4163         if (!amdgpu_async_gfx_ring) {
4164                 r = gfx_v11_0_cp_gfx_resume(adev);
4165                 if (r)
4166                         return r;
4167         } else {
4168                 r = gfx_v11_0_cp_async_gfx_ring_resume(adev);
4169                 if (r)
4170                         return r;
4171         }
4172
4173         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4174                 ring = &adev->gfx.gfx_ring[i];
4175                 r = amdgpu_ring_test_helper(ring);
4176                 if (r)
4177                         return r;
4178         }
4179
4180         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4181                 ring = &adev->gfx.compute_ring[i];
4182                 r = amdgpu_ring_test_helper(ring);
4183                 if (r)
4184                         return r;
4185         }
4186
4187         return 0;
4188 }
4189
4190 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable)
4191 {
4192         gfx_v11_0_cp_gfx_enable(adev, enable);
4193         gfx_v11_0_cp_compute_enable(adev, enable);
4194 }
4195
4196 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
4197 {
4198         int r;
4199         bool value;
4200
4201         r = adev->gfxhub.funcs->gart_enable(adev);
4202         if (r)
4203                 return r;
4204
4205         adev->hdp.funcs->flush_hdp(adev, NULL);
4206
4207         value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
4208                 false : true;
4209
4210         adev->gfxhub.funcs->set_fault_enable_default(adev, value);
4211         amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0);
4212
4213         return 0;
4214 }
4215
4216 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev)
4217 {
4218         u32 tmp;
4219
4220         /* select RS64 */
4221         if (adev->gfx.rs64_enable) {
4222                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL);
4223                 tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1);
4224                 WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp);
4225
4226                 tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL);
4227                 tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1);
4228                 WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp);
4229         }
4230
4231         if (amdgpu_emu_mode == 1)
4232                 msleep(100);
4233 }
4234
4235 static int get_gb_addr_config(struct amdgpu_device * adev)
4236 {
4237         u32 gb_addr_config;
4238
4239         gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
4240         if (gb_addr_config == 0)
4241                 return -EINVAL;
4242
4243         adev->gfx.config.gb_addr_config_fields.num_pkrs =
4244                 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
4245
4246         adev->gfx.config.gb_addr_config = gb_addr_config;
4247
4248         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
4249                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4250                                       GB_ADDR_CONFIG, NUM_PIPES);
4251
4252         adev->gfx.config.max_tile_pipes =
4253                 adev->gfx.config.gb_addr_config_fields.num_pipes;
4254
4255         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
4256                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4257                                       GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
4258         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
4259                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4260                                       GB_ADDR_CONFIG, NUM_RB_PER_SE);
4261         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
4262                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4263                                       GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
4264         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
4265                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4266                                       GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
4267
4268         return 0;
4269 }
4270
4271 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
4272 {
4273         uint32_t data;
4274
4275         data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
4276         data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
4277         WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
4278
4279         data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
4280         data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
4281         WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
4282 }
4283
4284 static int gfx_v11_0_hw_init(void *handle)
4285 {
4286         int r;
4287         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4288
4289         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
4290                 if (adev->gfx.imu.funcs) {
4291                         /* RLC autoload sequence 1: Program rlc ram */
4292                         if (adev->gfx.imu.funcs->program_rlc_ram)
4293                                 adev->gfx.imu.funcs->program_rlc_ram(adev);
4294                 }
4295                 /* rlc autoload firmware */
4296                 r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
4297                 if (r)
4298                         return r;
4299         } else {
4300                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4301                         if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
4302                                 if (adev->gfx.imu.funcs->load_microcode)
4303                                         adev->gfx.imu.funcs->load_microcode(adev);
4304                                 if (adev->gfx.imu.funcs->setup_imu)
4305                                         adev->gfx.imu.funcs->setup_imu(adev);
4306                                 if (adev->gfx.imu.funcs->start_imu)
4307                                         adev->gfx.imu.funcs->start_imu(adev);
4308                         }
4309
4310                         /* disable gpa mode in backdoor loading */
4311                         gfx_v11_0_disable_gpa_mode(adev);
4312                 }
4313         }
4314
4315         if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
4316             (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
4317                 r = gfx_v11_0_wait_for_rlc_autoload_complete(adev);
4318                 if (r) {
4319                         dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
4320                         return r;
4321                 }
4322         }
4323
4324         adev->gfx.is_poweron = true;
4325
4326         if(get_gb_addr_config(adev))
4327                 DRM_WARN("Invalid gb_addr_config !\n");
4328
4329         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
4330             adev->gfx.rs64_enable)
4331                 gfx_v11_0_config_gfx_rs64(adev);
4332
4333         r = gfx_v11_0_gfxhub_enable(adev);
4334         if (r)
4335                 return r;
4336
4337         if (!amdgpu_emu_mode)
4338                 gfx_v11_0_init_golden_registers(adev);
4339
4340         if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
4341             (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
4342                 /**
4343                  * For gfx 11, rlc firmware loading relies on smu firmware is
4344                  * loaded firstly, so in direct type, it has to load smc ucode
4345                  * here before rlc.
4346                  */
4347                 if (!(adev->flags & AMD_IS_APU)) {
4348                         r = amdgpu_pm_load_smu_firmware(adev, NULL);
4349                         if (r)
4350                                 return r;
4351                 }
4352         }
4353
4354         gfx_v11_0_constants_init(adev);
4355
4356         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
4357                 gfx_v11_0_select_cp_fw_arch(adev);
4358
4359         if (adev->nbio.funcs->gc_doorbell_init)
4360                 adev->nbio.funcs->gc_doorbell_init(adev);
4361
4362         r = gfx_v11_0_rlc_resume(adev);
4363         if (r)
4364                 return r;
4365
4366         /*
4367          * init golden registers and rlc resume may override some registers,
4368          * reconfig them here
4369          */
4370         gfx_v11_0_tcp_harvest(adev);
4371
4372         r = gfx_v11_0_cp_resume(adev);
4373         if (r)
4374                 return r;
4375
4376         return r;
4377 }
4378
4379 #ifndef BRING_UP_DEBUG
4380 static int gfx_v11_0_kiq_disable_kgq(struct amdgpu_device *adev)
4381 {
4382         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4383         struct amdgpu_ring *kiq_ring = &kiq->ring;
4384         int i, r = 0;
4385
4386         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
4387                 return -EINVAL;
4388
4389         if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
4390                                         adev->gfx.num_gfx_rings))
4391                 return -ENOMEM;
4392
4393         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4394                 kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i],
4395                                            PREEMPT_QUEUES, 0, 0);
4396
4397         if (adev->gfx.kiq.ring.sched.ready)
4398                 r = amdgpu_ring_test_helper(kiq_ring);
4399
4400         return r;
4401 }
4402 #endif
4403
4404 static int gfx_v11_0_hw_fini(void *handle)
4405 {
4406         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4407         int r;
4408         uint32_t tmp;
4409
4410         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4411         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4412
4413         if (!adev->no_hw_access) {
4414 #ifndef BRING_UP_DEBUG
4415                 if (amdgpu_async_gfx_ring) {
4416                         r = gfx_v11_0_kiq_disable_kgq(adev);
4417                         if (r)
4418                                 DRM_ERROR("KGQ disable failed\n");
4419                 }
4420 #endif
4421                 if (amdgpu_gfx_disable_kcq(adev))
4422                         DRM_ERROR("KCQ disable failed\n");
4423
4424                 amdgpu_mes_kiq_hw_fini(adev);
4425         }
4426
4427         if (amdgpu_sriov_vf(adev)) {
4428                 gfx_v11_0_cp_gfx_enable(adev, false);
4429                 /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
4430                 tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
4431                 tmp &= 0xffffff00;
4432                 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
4433
4434                 return 0;
4435         }
4436         gfx_v11_0_cp_enable(adev, false);
4437         gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4438
4439         adev->gfxhub.funcs->gart_disable(adev);
4440
4441         adev->gfx.is_poweron = false;
4442
4443         return 0;
4444 }
4445
4446 static int gfx_v11_0_suspend(void *handle)
4447 {
4448         return gfx_v11_0_hw_fini(handle);
4449 }
4450
4451 static int gfx_v11_0_resume(void *handle)
4452 {
4453         return gfx_v11_0_hw_init(handle);
4454 }
4455
4456 static bool gfx_v11_0_is_idle(void *handle)
4457 {
4458         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4459
4460         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
4461                                 GRBM_STATUS, GUI_ACTIVE))
4462                 return false;
4463         else
4464                 return true;
4465 }
4466
4467 static int gfx_v11_0_wait_for_idle(void *handle)
4468 {
4469         unsigned i;
4470         u32 tmp;
4471         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4472
4473         for (i = 0; i < adev->usec_timeout; i++) {
4474                 /* read MC_STATUS */
4475                 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
4476                         GRBM_STATUS__GUI_ACTIVE_MASK;
4477
4478                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4479                         return 0;
4480                 udelay(1);
4481         }
4482         return -ETIMEDOUT;
4483 }
4484
4485 static int gfx_v11_0_soft_reset(void *handle)
4486 {
4487         u32 grbm_soft_reset = 0;
4488         u32 tmp;
4489         int i, j, k;
4490         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4491
4492         tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4493         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
4494         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0);
4495         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0);
4496         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
4497         WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4498
4499         gfx_v11_0_set_safe_mode(adev);
4500
4501         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
4502                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
4503                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
4504                                 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
4505                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
4506                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
4507                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
4508                                 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
4509
4510                                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
4511                                 WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
4512                         }
4513                 }
4514         }
4515         for (i = 0; i < adev->gfx.me.num_me; ++i) {
4516                 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
4517                         for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
4518                                 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
4519                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
4520                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
4521                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
4522                                 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
4523
4524                                 WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
4525                         }
4526                 }
4527         }
4528
4529         WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
4530
4531         // Read CP_VMID_RESET register three times.
4532         // to get sufficient time for GFX_HQD_ACTIVE reach 0
4533         RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4534         RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4535         RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4536
4537         for (i = 0; i < adev->usec_timeout; i++) {
4538                 if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
4539                     !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
4540                         break;
4541                 udelay(1);
4542         }
4543         if (i >= adev->usec_timeout) {
4544                 printk("Failed to wait all pipes clean\n");
4545                 return -EINVAL;
4546         }
4547
4548         /**********  trigger soft reset  ***********/
4549         grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4550         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4551                                         SOFT_RESET_CP, 1);
4552         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4553                                         SOFT_RESET_GFX, 1);
4554         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4555                                         SOFT_RESET_CPF, 1);
4556         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4557                                         SOFT_RESET_CPC, 1);
4558         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4559                                         SOFT_RESET_CPG, 1);
4560         WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4561         /**********  exit soft reset  ***********/
4562         grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4563         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4564                                         SOFT_RESET_CP, 0);
4565         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4566                                         SOFT_RESET_GFX, 0);
4567         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4568                                         SOFT_RESET_CPF, 0);
4569         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4570                                         SOFT_RESET_CPC, 0);
4571         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4572                                         SOFT_RESET_CPG, 0);
4573         WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4574
4575         tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL);
4576         tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1);
4577         WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp);
4578
4579         WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0);
4580         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0);
4581
4582         for (i = 0; i < adev->usec_timeout; i++) {
4583                 if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET))
4584                         break;
4585                 udelay(1);
4586         }
4587         if (i >= adev->usec_timeout) {
4588                 printk("Failed to wait CP_VMID_RESET to 0\n");
4589                 return -EINVAL;
4590         }
4591
4592         tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4593         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
4594         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
4595         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
4596         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
4597         WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4598
4599         gfx_v11_0_unset_safe_mode(adev);
4600
4601         return gfx_v11_0_cp_resume(adev);
4602 }
4603
4604 static bool gfx_v11_0_check_soft_reset(void *handle)
4605 {
4606         int i, r;
4607         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4608         struct amdgpu_ring *ring;
4609         long tmo = msecs_to_jiffies(1000);
4610
4611         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4612                 ring = &adev->gfx.gfx_ring[i];
4613                 r = amdgpu_ring_test_ib(ring, tmo);
4614                 if (r)
4615                         return true;
4616         }
4617
4618         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4619                 ring = &adev->gfx.compute_ring[i];
4620                 r = amdgpu_ring_test_ib(ring, tmo);
4621                 if (r)
4622                         return true;
4623         }
4624
4625         return false;
4626 }
4627
4628 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4629 {
4630         uint64_t clock;
4631
4632         amdgpu_gfx_off_ctrl(adev, false);
4633         mutex_lock(&adev->gfx.gpu_clock_mutex);
4634         clock = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER) |
4635                 ((uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER) << 32ULL);
4636         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4637         amdgpu_gfx_off_ctrl(adev, true);
4638         return clock;
4639 }
4640
4641 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4642                                            uint32_t vmid,
4643                                            uint32_t gds_base, uint32_t gds_size,
4644                                            uint32_t gws_base, uint32_t gws_size,
4645                                            uint32_t oa_base, uint32_t oa_size)
4646 {
4647         struct amdgpu_device *adev = ring->adev;
4648
4649         /* GDS Base */
4650         gfx_v11_0_write_data_to_reg(ring, 0, false,
4651                                     SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid,
4652                                     gds_base);
4653
4654         /* GDS Size */
4655         gfx_v11_0_write_data_to_reg(ring, 0, false,
4656                                     SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid,
4657                                     gds_size);
4658
4659         /* GWS */
4660         gfx_v11_0_write_data_to_reg(ring, 0, false,
4661                                     SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid,
4662                                     gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4663
4664         /* OA */
4665         gfx_v11_0_write_data_to_reg(ring, 0, false,
4666                                     SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid,
4667                                     (1 << (oa_size + oa_base)) - (1 << oa_base));
4668 }
4669
4670 static int gfx_v11_0_early_init(void *handle)
4671 {
4672         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4673
4674         adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
4675         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4676                                           AMDGPU_MAX_COMPUTE_RINGS);
4677
4678         gfx_v11_0_set_kiq_pm4_funcs(adev);
4679         gfx_v11_0_set_ring_funcs(adev);
4680         gfx_v11_0_set_irq_funcs(adev);
4681         gfx_v11_0_set_gds_init(adev);
4682         gfx_v11_0_set_rlc_funcs(adev);
4683         gfx_v11_0_set_mqd_funcs(adev);
4684         gfx_v11_0_set_imu_funcs(adev);
4685
4686         gfx_v11_0_init_rlcg_reg_access_ctrl(adev);
4687
4688         return 0;
4689 }
4690
4691 static int gfx_v11_0_late_init(void *handle)
4692 {
4693         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4694         int r;
4695
4696         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4697         if (r)
4698                 return r;
4699
4700         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4701         if (r)
4702                 return r;
4703
4704         return 0;
4705 }
4706
4707 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev)
4708 {
4709         uint32_t rlc_cntl;
4710
4711         /* if RLC is not enabled, do nothing */
4712         rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
4713         return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
4714 }
4715
4716 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev)
4717 {
4718         uint32_t data;
4719         unsigned i;
4720
4721         data = RLC_SAFE_MODE__CMD_MASK;
4722         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4723
4724         WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
4725
4726         /* wait for RLC_SAFE_MODE */
4727         for (i = 0; i < adev->usec_timeout; i++) {
4728                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
4729                                    RLC_SAFE_MODE, CMD))
4730                         break;
4731                 udelay(1);
4732         }
4733 }
4734
4735 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev)
4736 {
4737         WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
4738 }
4739
4740 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
4741                                       bool enable)
4742 {
4743         uint32_t def, data;
4744
4745         if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
4746                 return;
4747
4748         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4749
4750         if (enable)
4751                 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
4752         else
4753                 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
4754
4755         if (def != data)
4756                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4757 }
4758
4759 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev,
4760                                        bool enable)
4761 {
4762         uint32_t def, data;
4763
4764         if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
4765                 return;
4766
4767         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4768
4769         if (enable)
4770                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4771         else
4772                 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4773
4774         if (def != data)
4775                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4776 }
4777
4778 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev,
4779                                            bool enable)
4780 {
4781         uint32_t def, data;
4782
4783         if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
4784                 return;
4785
4786         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4787
4788         if (enable)
4789                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
4790         else
4791                 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
4792
4793         if (def != data)
4794                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4795 }
4796
4797 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4798                                                        bool enable)
4799 {
4800         uint32_t data, def;
4801
4802         if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
4803                 return;
4804
4805         /* It is disabled by HW by default */
4806         if (enable) {
4807                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
4808                         /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4809                         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4810
4811                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4812                                   RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4813                                   RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
4814
4815                         if (def != data)
4816                                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4817                 }
4818         } else {
4819                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
4820                         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4821
4822                         data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4823                                  RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4824                                  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
4825
4826                         if (def != data)
4827                                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4828                 }
4829         }
4830 }
4831
4832 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4833                                                        bool enable)
4834 {
4835         uint32_t def, data;
4836
4837         if (!(adev->cg_flags &
4838               (AMD_CG_SUPPORT_GFX_CGCG |
4839               AMD_CG_SUPPORT_GFX_CGLS |
4840               AMD_CG_SUPPORT_GFX_3D_CGCG |
4841               AMD_CG_SUPPORT_GFX_3D_CGLS)))
4842                 return;
4843
4844         if (enable) {
4845                 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4846
4847                 /* unset CGCG override */
4848                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
4849                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4850                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4851                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4852                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
4853                     adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4854                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4855
4856                 /* update CGCG override bits */
4857                 if (def != data)
4858                         WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4859
4860                 /* enable cgcg FSM(0x0000363F) */
4861                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
4862
4863                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
4864                         data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
4865                         data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4866                                  RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4867                 }
4868
4869                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
4870                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
4871                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4872                                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4873                 }
4874
4875                 if (def != data)
4876                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
4877
4878                 /* Program RLC_CGCG_CGLS_CTRL_3D */
4879                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
4880
4881                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
4882                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
4883                         data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4884                                  RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4885                 }
4886
4887                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
4888                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
4889                         data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4890                                  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4891                 }
4892
4893                 if (def != data)
4894                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
4895
4896                 /* set IDLE_POLL_COUNT(0x00900100) */
4897                 def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
4898
4899                 data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
4900                 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4901                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4902
4903                 if (def != data)
4904                         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
4905
4906                 data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4907                 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
4908                 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
4909                 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
4910                 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
4911                 WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
4912
4913                 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
4914                 data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
4915                 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
4916
4917                 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
4918                 if (adev->sdma.num_instances > 1) {
4919                         data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
4920                         data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
4921                         WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
4922                 }
4923         } else {
4924                 /* Program RLC_CGCG_CGLS_CTRL */
4925                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
4926
4927                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
4928                         data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4929
4930                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4931                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4932
4933                 if (def != data)
4934                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
4935
4936                 /* Program RLC_CGCG_CGLS_CTRL_3D */
4937                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
4938
4939                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4940                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4941                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4942                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4943
4944                 if (def != data)
4945                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
4946
4947                 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
4948                 data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
4949                 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
4950
4951                 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
4952                 if (adev->sdma.num_instances > 1) {
4953                         data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
4954                         data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
4955                         WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
4956                 }
4957         }
4958 }
4959
4960 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4961                                             bool enable)
4962 {
4963         amdgpu_gfx_rlc_enter_safe_mode(adev);
4964
4965         gfx_v11_0_update_coarse_grain_clock_gating(adev, enable);
4966
4967         gfx_v11_0_update_medium_grain_clock_gating(adev, enable);
4968
4969         gfx_v11_0_update_repeater_fgcg(adev, enable);
4970
4971         gfx_v11_0_update_sram_fgcg(adev, enable);
4972
4973         gfx_v11_0_update_perf_clk(adev, enable);
4974
4975         if (adev->cg_flags &
4976             (AMD_CG_SUPPORT_GFX_MGCG |
4977              AMD_CG_SUPPORT_GFX_CGLS |
4978              AMD_CG_SUPPORT_GFX_CGCG |
4979              AMD_CG_SUPPORT_GFX_3D_CGCG |
4980              AMD_CG_SUPPORT_GFX_3D_CGLS))
4981                 gfx_v11_0_enable_gui_idle_interrupt(adev, enable);
4982
4983         amdgpu_gfx_rlc_exit_safe_mode(adev);
4984
4985         return 0;
4986 }
4987
4988 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
4989 {
4990         u32 reg, data;
4991
4992         amdgpu_gfx_off_ctrl(adev, false);
4993
4994         reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
4995         if (amdgpu_sriov_is_pp_one_vf(adev))
4996                 data = RREG32_NO_KIQ(reg);
4997         else
4998                 data = RREG32(reg);
4999
5000         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5001         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5002
5003         if (amdgpu_sriov_is_pp_one_vf(adev))
5004                 WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
5005         else
5006                 WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
5007
5008         amdgpu_gfx_off_ctrl(adev, true);
5009 }
5010
5011 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
5012         .is_rlc_enabled = gfx_v11_0_is_rlc_enabled,
5013         .set_safe_mode = gfx_v11_0_set_safe_mode,
5014         .unset_safe_mode = gfx_v11_0_unset_safe_mode,
5015         .init = gfx_v11_0_rlc_init,
5016         .get_csb_size = gfx_v11_0_get_csb_size,
5017         .get_csb_buffer = gfx_v11_0_get_csb_buffer,
5018         .resume = gfx_v11_0_rlc_resume,
5019         .stop = gfx_v11_0_rlc_stop,
5020         .reset = gfx_v11_0_rlc_reset,
5021         .start = gfx_v11_0_rlc_start,
5022         .update_spm_vmid = gfx_v11_0_update_spm_vmid,
5023 };
5024
5025 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
5026 {
5027         u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
5028
5029         if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5030                 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5031         else
5032                 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5033
5034         WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
5035
5036         // Program RLC_PG_DELAY3 for CGPG hysteresis
5037         if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
5038                 switch (adev->ip_versions[GC_HWIP][0]) {
5039                 case IP_VERSION(11, 0, 1):
5040                 case IP_VERSION(11, 0, 4):
5041                         WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
5042                         break;
5043                 default:
5044                         break;
5045                 }
5046         }
5047 }
5048
5049 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
5050 {
5051         amdgpu_gfx_rlc_enter_safe_mode(adev);
5052
5053         gfx_v11_cntl_power_gating(adev, enable);
5054
5055         amdgpu_gfx_rlc_exit_safe_mode(adev);
5056 }
5057
5058 static int gfx_v11_0_set_powergating_state(void *handle,
5059                                            enum amd_powergating_state state)
5060 {
5061         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5062         bool enable = (state == AMD_PG_STATE_GATE);
5063
5064         if (amdgpu_sriov_vf(adev))
5065                 return 0;
5066
5067         switch (adev->ip_versions[GC_HWIP][0]) {
5068         case IP_VERSION(11, 0, 0):
5069         case IP_VERSION(11, 0, 2):
5070         case IP_VERSION(11, 0, 3):
5071                 amdgpu_gfx_off_ctrl(adev, enable);
5072                 break;
5073         case IP_VERSION(11, 0, 1):
5074         case IP_VERSION(11, 0, 4):
5075                 gfx_v11_cntl_pg(adev, enable);
5076                 amdgpu_gfx_off_ctrl(adev, enable);
5077                 break;
5078         default:
5079                 break;
5080         }
5081
5082         return 0;
5083 }
5084
5085 static int gfx_v11_0_set_clockgating_state(void *handle,
5086                                           enum amd_clockgating_state state)
5087 {
5088         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5089
5090         if (amdgpu_sriov_vf(adev))
5091                 return 0;
5092
5093         switch (adev->ip_versions[GC_HWIP][0]) {
5094         case IP_VERSION(11, 0, 0):
5095         case IP_VERSION(11, 0, 1):
5096         case IP_VERSION(11, 0, 2):
5097         case IP_VERSION(11, 0, 3):
5098         case IP_VERSION(11, 0, 4):
5099                 gfx_v11_0_update_gfx_clock_gating(adev,
5100                                 state ==  AMD_CG_STATE_GATE);
5101                 break;
5102         default:
5103                 break;
5104         }
5105
5106         return 0;
5107 }
5108
5109 static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags)
5110 {
5111         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5112         int data;
5113
5114         /* AMD_CG_SUPPORT_GFX_MGCG */
5115         data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5116         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5117                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5118
5119         /* AMD_CG_SUPPORT_REPEATER_FGCG */
5120         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
5121                 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
5122
5123         /* AMD_CG_SUPPORT_GFX_FGCG */
5124         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
5125                 *flags |= AMD_CG_SUPPORT_GFX_FGCG;
5126
5127         /* AMD_CG_SUPPORT_GFX_PERF_CLK */
5128         if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
5129                 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
5130
5131         /* AMD_CG_SUPPORT_GFX_CGCG */
5132         data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5133         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5134                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5135
5136         /* AMD_CG_SUPPORT_GFX_CGLS */
5137         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5138                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5139
5140         /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5141         data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5142         if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5143                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5144
5145         /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5146         if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5147                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5148 }
5149
5150 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5151 {
5152         /* gfx11 is 32bit rptr*/
5153         return *(uint32_t *)ring->rptr_cpu_addr;
5154 }
5155
5156 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5157 {
5158         struct amdgpu_device *adev = ring->adev;
5159         u64 wptr;
5160
5161         /* XXX check if swapping is necessary on BE */
5162         if (ring->use_doorbell) {
5163                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5164         } else {
5165                 wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
5166                 wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
5167         }
5168
5169         return wptr;
5170 }
5171
5172 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5173 {
5174         struct amdgpu_device *adev = ring->adev;
5175         uint32_t *wptr_saved;
5176         uint32_t *is_queue_unmap;
5177         uint64_t aggregated_db_index;
5178         uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
5179         uint64_t wptr_tmp;
5180
5181         if (ring->is_mes_queue) {
5182                 wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
5183                 is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
5184                                               sizeof(uint32_t));
5185                 aggregated_db_index =
5186                         amdgpu_mes_get_aggregated_doorbell_index(adev,
5187                                                                  ring->hw_prio);
5188
5189                 wptr_tmp = ring->wptr & ring->buf_mask;
5190                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
5191                 *wptr_saved = wptr_tmp;
5192                 /* assume doorbell always being used by mes mapped queue */
5193                 if (*is_queue_unmap) {
5194                         WDOORBELL64(aggregated_db_index, wptr_tmp);
5195                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
5196                 } else {
5197                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
5198
5199                         if (*is_queue_unmap)
5200                                 WDOORBELL64(aggregated_db_index, wptr_tmp);
5201                 }
5202         } else {
5203                 if (ring->use_doorbell) {
5204                         /* XXX check if swapping is necessary on BE */
5205                         atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5206                                      ring->wptr);
5207                         WDOORBELL64(ring->doorbell_index, ring->wptr);
5208                 } else {
5209                         WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
5210                                      lower_32_bits(ring->wptr));
5211                         WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
5212                                      upper_32_bits(ring->wptr));
5213                 }
5214         }
5215 }
5216
5217 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5218 {
5219         /* gfx11 hardware is 32bit rptr */
5220         return *(uint32_t *)ring->rptr_cpu_addr;
5221 }
5222
5223 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5224 {
5225         u64 wptr;
5226
5227         /* XXX check if swapping is necessary on BE */
5228         if (ring->use_doorbell)
5229                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5230         else
5231                 BUG();
5232         return wptr;
5233 }
5234
5235 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5236 {
5237         struct amdgpu_device *adev = ring->adev;
5238         uint32_t *wptr_saved;
5239         uint32_t *is_queue_unmap;
5240         uint64_t aggregated_db_index;
5241         uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
5242         uint64_t wptr_tmp;
5243
5244         if (ring->is_mes_queue) {
5245                 wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
5246                 is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
5247                                               sizeof(uint32_t));
5248                 aggregated_db_index =
5249                         amdgpu_mes_get_aggregated_doorbell_index(adev,
5250                                                                  ring->hw_prio);
5251
5252                 wptr_tmp = ring->wptr & ring->buf_mask;
5253                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
5254                 *wptr_saved = wptr_tmp;
5255                 /* assume doorbell always used by mes mapped queue */
5256                 if (*is_queue_unmap) {
5257                         WDOORBELL64(aggregated_db_index, wptr_tmp);
5258                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
5259                 } else {
5260                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
5261
5262                         if (*is_queue_unmap)
5263                                 WDOORBELL64(aggregated_db_index, wptr_tmp);
5264                 }
5265         } else {
5266                 /* XXX check if swapping is necessary on BE */
5267                 if (ring->use_doorbell) {
5268                         atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5269                                      ring->wptr);
5270                         WDOORBELL64(ring->doorbell_index, ring->wptr);
5271                 } else {
5272                         BUG(); /* only DOORBELL method supported on gfx11 now */
5273                 }
5274         }
5275 }
5276
5277 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5278 {
5279         struct amdgpu_device *adev = ring->adev;
5280         u32 ref_and_mask, reg_mem_engine;
5281         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5282
5283         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5284                 switch (ring->me) {
5285                 case 1:
5286                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5287                         break;
5288                 case 2:
5289                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5290                         break;
5291                 default:
5292                         return;
5293                 }
5294                 reg_mem_engine = 0;
5295         } else {
5296                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5297                 reg_mem_engine = 1; /* pfp */
5298         }
5299
5300         gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5301                                adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5302                                adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5303                                ref_and_mask, ref_and_mask, 0x20);
5304 }
5305
5306 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5307                                        struct amdgpu_job *job,
5308                                        struct amdgpu_ib *ib,
5309                                        uint32_t flags)
5310 {
5311         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5312         u32 header, control = 0;
5313
5314         BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE);
5315
5316         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5317
5318         control |= ib->length_dw | (vmid << 24);
5319
5320         if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5321                 control |= INDIRECT_BUFFER_PRE_ENB(1);
5322
5323                 if (flags & AMDGPU_IB_PREEMPTED)
5324                         control |= INDIRECT_BUFFER_PRE_RESUME(1);
5325
5326                 if (vmid)
5327                         gfx_v11_0_ring_emit_de_meta(ring,
5328                                     (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
5329         }
5330
5331         if (ring->is_mes_queue)
5332                 /* inherit vmid from mqd */
5333                 control |= 0x400000;
5334
5335         amdgpu_ring_write(ring, header);
5336         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5337         amdgpu_ring_write(ring,
5338 #ifdef __BIG_ENDIAN
5339                 (2 << 0) |
5340 #endif
5341                 lower_32_bits(ib->gpu_addr));
5342         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5343         amdgpu_ring_write(ring, control);
5344 }
5345
5346 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5347                                            struct amdgpu_job *job,
5348                                            struct amdgpu_ib *ib,
5349                                            uint32_t flags)
5350 {
5351         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5352         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5353
5354         if (ring->is_mes_queue)
5355                 /* inherit vmid from mqd */
5356                 control |= 0x40000000;
5357
5358         /* Currently, there is a high possibility to get wave ID mismatch
5359          * between ME and GDS, leading to a hw deadlock, because ME generates
5360          * different wave IDs than the GDS expects. This situation happens
5361          * randomly when at least 5 compute pipes use GDS ordered append.
5362          * The wave IDs generated by ME are also wrong after suspend/resume.
5363          * Those are probably bugs somewhere else in the kernel driver.
5364          *
5365          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5366          * GDS to 0 for this ring (me/pipe).
5367          */
5368         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5369                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5370                 amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
5371                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5372         }
5373
5374         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5375         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5376         amdgpu_ring_write(ring,
5377 #ifdef __BIG_ENDIAN
5378                                 (2 << 0) |
5379 #endif
5380                                 lower_32_bits(ib->gpu_addr));
5381         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5382         amdgpu_ring_write(ring, control);
5383 }
5384
5385 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5386                                      u64 seq, unsigned flags)
5387 {
5388         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5389         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5390
5391         /* RELEASE_MEM - flush caches, send int */
5392         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5393         amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
5394                                  PACKET3_RELEASE_MEM_GCR_GL2_WB |
5395                                  PACKET3_RELEASE_MEM_GCR_GL2_INV |
5396                                  PACKET3_RELEASE_MEM_GCR_GL2_US |
5397                                  PACKET3_RELEASE_MEM_GCR_GL1_INV |
5398                                  PACKET3_RELEASE_MEM_GCR_GLV_INV |
5399                                  PACKET3_RELEASE_MEM_GCR_GLM_INV |
5400                                  PACKET3_RELEASE_MEM_GCR_GLM_WB |
5401                                  PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
5402                                  PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5403                                  PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
5404         amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
5405                                  PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
5406
5407         /*
5408          * the address should be Qword aligned if 64bit write, Dword
5409          * aligned if only send 32bit data low (discard data high)
5410          */
5411         if (write64bit)
5412                 BUG_ON(addr & 0x7);
5413         else
5414                 BUG_ON(addr & 0x3);
5415         amdgpu_ring_write(ring, lower_32_bits(addr));
5416         amdgpu_ring_write(ring, upper_32_bits(addr));
5417         amdgpu_ring_write(ring, lower_32_bits(seq));
5418         amdgpu_ring_write(ring, upper_32_bits(seq));
5419         amdgpu_ring_write(ring, ring->is_mes_queue ?
5420                          (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
5421 }
5422
5423 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5424 {
5425         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5426         uint32_t seq = ring->fence_drv.sync_seq;
5427         uint64_t addr = ring->fence_drv.gpu_addr;
5428
5429         gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
5430                                upper_32_bits(addr), seq, 0xffffffff, 4);
5431 }
5432
5433 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
5434                                    uint16_t pasid, uint32_t flush_type,
5435                                    bool all_hub, uint8_t dst_sel)
5436 {
5437         amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
5438         amdgpu_ring_write(ring,
5439                           PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
5440                           PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
5441                           PACKET3_INVALIDATE_TLBS_PASID(pasid) |
5442                           PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
5443 }
5444
5445 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5446                                          unsigned vmid, uint64_t pd_addr)
5447 {
5448         if (ring->is_mes_queue)
5449                 gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
5450         else
5451                 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5452
5453         /* compute doesn't have PFP */
5454         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5455                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5456                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5457                 amdgpu_ring_write(ring, 0x0);
5458         }
5459 }
5460
5461 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5462                                           u64 seq, unsigned int flags)
5463 {
5464         struct amdgpu_device *adev = ring->adev;
5465
5466         /* we only allocate 32bit for each seq wb address */
5467         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5468
5469         /* write fence seq to the "addr" */
5470         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5471         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5472                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5473         amdgpu_ring_write(ring, lower_32_bits(addr));
5474         amdgpu_ring_write(ring, upper_32_bits(addr));
5475         amdgpu_ring_write(ring, lower_32_bits(seq));
5476
5477         if (flags & AMDGPU_FENCE_FLAG_INT) {
5478                 /* set register to trigger INT */
5479                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5480                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5481                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5482                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
5483                 amdgpu_ring_write(ring, 0);
5484                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5485         }
5486 }
5487
5488 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
5489                                          uint32_t flags)
5490 {
5491         uint32_t dw2 = 0;
5492
5493         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5494         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5495                 /* set load_global_config & load_global_uconfig */
5496                 dw2 |= 0x8001;
5497                 /* set load_cs_sh_regs */
5498                 dw2 |= 0x01000000;
5499                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5500                 dw2 |= 0x10002;
5501         }
5502
5503         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5504         amdgpu_ring_write(ring, dw2);
5505         amdgpu_ring_write(ring, 0);
5506 }
5507
5508 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5509 {
5510         unsigned ret;
5511
5512         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5513         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5514         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5515         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5516         ret = ring->wptr & ring->buf_mask;
5517         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5518
5519         return ret;
5520 }
5521
5522 static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5523 {
5524         unsigned cur;
5525         BUG_ON(offset > ring->buf_mask);
5526         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5527
5528         cur = (ring->wptr - 1) & ring->buf_mask;
5529         if (likely(cur > offset))
5530                 ring->ring[offset] = cur - offset;
5531         else
5532                 ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
5533 }
5534
5535 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
5536 {
5537         int i, r = 0;
5538         struct amdgpu_device *adev = ring->adev;
5539         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
5540         struct amdgpu_ring *kiq_ring = &kiq->ring;
5541         unsigned long flags;
5542
5543         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5544                 return -EINVAL;
5545
5546         spin_lock_irqsave(&kiq->ring_lock, flags);
5547
5548         if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5549                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
5550                 return -ENOMEM;
5551         }
5552
5553         /* assert preemption condition */
5554         amdgpu_ring_set_preempt_cond_exec(ring, false);
5555
5556         /* assert IB preemption, emit the trailing fence */
5557         kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5558                                    ring->trail_fence_gpu_addr,
5559                                    ++ring->trail_seq);
5560         amdgpu_ring_commit(kiq_ring);
5561
5562         spin_unlock_irqrestore(&kiq->ring_lock, flags);
5563
5564         /* poll the trailing fence */
5565         for (i = 0; i < adev->usec_timeout; i++) {
5566                 if (ring->trail_seq ==
5567                     le32_to_cpu(*(ring->trail_fence_cpu_addr)))
5568                         break;
5569                 udelay(1);
5570         }
5571
5572         if (i >= adev->usec_timeout) {
5573                 r = -EINVAL;
5574                 DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
5575         }
5576
5577         /* deassert preemption condition */
5578         amdgpu_ring_set_preempt_cond_exec(ring, true);
5579         return r;
5580 }
5581
5582 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
5583 {
5584         struct amdgpu_device *adev = ring->adev;
5585         struct v10_de_ib_state de_payload = {0};
5586         uint64_t offset, gds_addr, de_payload_gpu_addr;
5587         void *de_payload_cpu_addr;
5588         int cnt;
5589
5590         if (ring->is_mes_queue) {
5591                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5592                                   gfx[0].gfx_meta_data) +
5593                         offsetof(struct v10_gfx_meta_data, de_payload);
5594                 de_payload_gpu_addr =
5595                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5596                 de_payload_cpu_addr =
5597                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5598
5599                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5600                                   gfx[0].gds_backup) +
5601                         offsetof(struct v10_gfx_meta_data, de_payload);
5602                 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5603         } else {
5604                 offset = offsetof(struct v10_gfx_meta_data, de_payload);
5605                 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5606                 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5607
5608                 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5609                                  AMDGPU_CSA_SIZE - adev->gds.gds_size,
5610                                  PAGE_SIZE);
5611         }
5612
5613         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5614         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5615
5616         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5617         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5618         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5619                                  WRITE_DATA_DST_SEL(8) |
5620                                  WR_CONFIRM) |
5621                                  WRITE_DATA_CACHE_POLICY(0));
5622         amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5623         amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5624
5625         if (resume)
5626                 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5627                                            sizeof(de_payload) >> 2);
5628         else
5629                 amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5630                                            sizeof(de_payload) >> 2);
5631 }
5632
5633 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5634                                     bool secure)
5635 {
5636         uint32_t v = secure ? FRAME_TMZ : 0;
5637
5638         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5639         amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5640 }
5641
5642 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5643                                      uint32_t reg_val_offs)
5644 {
5645         struct amdgpu_device *adev = ring->adev;
5646
5647         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5648         amdgpu_ring_write(ring, 0 |     /* src: register*/
5649                                 (5 << 8) |      /* dst: memory */
5650                                 (1 << 20));     /* write confirm */
5651         amdgpu_ring_write(ring, reg);
5652         amdgpu_ring_write(ring, 0);
5653         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5654                                 reg_val_offs * 4));
5655         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5656                                 reg_val_offs * 4));
5657 }
5658
5659 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5660                                    uint32_t val)
5661 {
5662         uint32_t cmd = 0;
5663
5664         switch (ring->funcs->type) {
5665         case AMDGPU_RING_TYPE_GFX:
5666                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5667                 break;
5668         case AMDGPU_RING_TYPE_KIQ:
5669                 cmd = (1 << 16); /* no inc addr */
5670                 break;
5671         default:
5672                 cmd = WR_CONFIRM;
5673                 break;
5674         }
5675         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5676         amdgpu_ring_write(ring, cmd);
5677         amdgpu_ring_write(ring, reg);
5678         amdgpu_ring_write(ring, 0);
5679         amdgpu_ring_write(ring, val);
5680 }
5681
5682 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5683                                         uint32_t val, uint32_t mask)
5684 {
5685         gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5686 }
5687
5688 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5689                                                    uint32_t reg0, uint32_t reg1,
5690                                                    uint32_t ref, uint32_t mask)
5691 {
5692         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5693
5694         gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5695                                ref, mask, 0x20);
5696 }
5697
5698 static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring,
5699                                          unsigned vmid)
5700 {
5701         struct amdgpu_device *adev = ring->adev;
5702         uint32_t value = 0;
5703
5704         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5705         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5706         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5707         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5708         WREG32_SOC15(GC, 0, regSQ_CMD, value);
5709 }
5710
5711 static void
5712 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5713                                       uint32_t me, uint32_t pipe,
5714                                       enum amdgpu_interrupt_state state)
5715 {
5716         uint32_t cp_int_cntl, cp_int_cntl_reg;
5717
5718         if (!me) {
5719                 switch (pipe) {
5720                 case 0:
5721                         cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
5722                         break;
5723                 case 1:
5724                         cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
5725                         break;
5726                 default:
5727                         DRM_DEBUG("invalid pipe %d\n", pipe);
5728                         return;
5729                 }
5730         } else {
5731                 DRM_DEBUG("invalid me %d\n", me);
5732                 return;
5733         }
5734
5735         switch (state) {
5736         case AMDGPU_IRQ_STATE_DISABLE:
5737                 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
5738                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5739                                             TIME_STAMP_INT_ENABLE, 0);
5740                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5741                                             GENERIC0_INT_ENABLE, 0);
5742                 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
5743                 break;
5744         case AMDGPU_IRQ_STATE_ENABLE:
5745                 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
5746                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5747                                             TIME_STAMP_INT_ENABLE, 1);
5748                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5749                                             GENERIC0_INT_ENABLE, 1);
5750                 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
5751                 break;
5752         default:
5753                 break;
5754         }
5755 }
5756
5757 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5758                                                      int me, int pipe,
5759                                                      enum amdgpu_interrupt_state state)
5760 {
5761         u32 mec_int_cntl, mec_int_cntl_reg;
5762
5763         /*
5764          * amdgpu controls only the first MEC. That's why this function only
5765          * handles the setting of interrupts for this specific MEC. All other
5766          * pipes' interrupts are set by amdkfd.
5767          */
5768
5769         if (me == 1) {
5770                 switch (pipe) {
5771                 case 0:
5772                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
5773                         break;
5774                 case 1:
5775                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
5776                         break;
5777                 case 2:
5778                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
5779                         break;
5780                 case 3:
5781                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
5782                         break;
5783                 default:
5784                         DRM_DEBUG("invalid pipe %d\n", pipe);
5785                         return;
5786                 }
5787         } else {
5788                 DRM_DEBUG("invalid me %d\n", me);
5789                 return;
5790         }
5791
5792         switch (state) {
5793         case AMDGPU_IRQ_STATE_DISABLE:
5794                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5795                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5796                                              TIME_STAMP_INT_ENABLE, 0);
5797                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5798                                              GENERIC0_INT_ENABLE, 0);
5799                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5800                 break;
5801         case AMDGPU_IRQ_STATE_ENABLE:
5802                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5803                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5804                                              TIME_STAMP_INT_ENABLE, 1);
5805                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5806                                              GENERIC0_INT_ENABLE, 1);
5807                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5808                 break;
5809         default:
5810                 break;
5811         }
5812 }
5813
5814 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5815                                             struct amdgpu_irq_src *src,
5816                                             unsigned type,
5817                                             enum amdgpu_interrupt_state state)
5818 {
5819         switch (type) {
5820         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5821                 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
5822                 break;
5823         case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
5824                 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
5825                 break;
5826         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5827                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5828                 break;
5829         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5830                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5831                 break;
5832         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5833                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5834                 break;
5835         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5836                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5837                 break;
5838         default:
5839                 break;
5840         }
5841         return 0;
5842 }
5843
5844 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
5845                              struct amdgpu_irq_src *source,
5846                              struct amdgpu_iv_entry *entry)
5847 {
5848         int i;
5849         u8 me_id, pipe_id, queue_id;
5850         struct amdgpu_ring *ring;
5851         uint32_t mes_queue_id = entry->src_data[0];
5852
5853         DRM_DEBUG("IH: CP EOP\n");
5854
5855         if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
5856                 struct amdgpu_mes_queue *queue;
5857
5858                 mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
5859
5860                 spin_lock(&adev->mes.queue_id_lock);
5861                 queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
5862                 if (queue) {
5863                         DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
5864                         amdgpu_fence_process(queue->ring);
5865                 }
5866                 spin_unlock(&adev->mes.queue_id_lock);
5867         } else {
5868                 me_id = (entry->ring_id & 0x0c) >> 2;
5869                 pipe_id = (entry->ring_id & 0x03) >> 0;
5870                 queue_id = (entry->ring_id & 0x70) >> 4;
5871
5872                 switch (me_id) {
5873                 case 0:
5874                         if (pipe_id == 0)
5875                                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5876                         else
5877                                 amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
5878                         break;
5879                 case 1:
5880                 case 2:
5881                         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5882                                 ring = &adev->gfx.compute_ring[i];
5883                                 /* Per-queue interrupt is supported for MEC starting from VI.
5884                                  * The interrupt can only be enabled/disabled per pipe instead
5885                                  * of per queue.
5886                                  */
5887                                 if ((ring->me == me_id) &&
5888                                     (ring->pipe == pipe_id) &&
5889                                     (ring->queue == queue_id))
5890                                         amdgpu_fence_process(ring);
5891                         }
5892                         break;
5893                 }
5894         }
5895
5896         return 0;
5897 }
5898
5899 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5900                                               struct amdgpu_irq_src *source,
5901                                               unsigned type,
5902                                               enum amdgpu_interrupt_state state)
5903 {
5904         switch (state) {
5905         case AMDGPU_IRQ_STATE_DISABLE:
5906         case AMDGPU_IRQ_STATE_ENABLE:
5907                 WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
5908                                PRIV_REG_INT_ENABLE,
5909                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5910                 break;
5911         default:
5912                 break;
5913         }
5914
5915         return 0;
5916 }
5917
5918 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5919                                                struct amdgpu_irq_src *source,
5920                                                unsigned type,
5921                                                enum amdgpu_interrupt_state state)
5922 {
5923         switch (state) {
5924         case AMDGPU_IRQ_STATE_DISABLE:
5925         case AMDGPU_IRQ_STATE_ENABLE:
5926                 WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
5927                                PRIV_INSTR_INT_ENABLE,
5928                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5929                 break;
5930         default:
5931                 break;
5932         }
5933
5934         return 0;
5935 }
5936
5937 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
5938                                         struct amdgpu_iv_entry *entry)
5939 {
5940         u8 me_id, pipe_id, queue_id;
5941         struct amdgpu_ring *ring;
5942         int i;
5943
5944         me_id = (entry->ring_id & 0x0c) >> 2;
5945         pipe_id = (entry->ring_id & 0x03) >> 0;
5946         queue_id = (entry->ring_id & 0x70) >> 4;
5947
5948         switch (me_id) {
5949         case 0:
5950                 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
5951                         ring = &adev->gfx.gfx_ring[i];
5952                         /* we only enabled 1 gfx queue per pipe for now */
5953                         if (ring->me == me_id && ring->pipe == pipe_id)
5954                                 drm_sched_fault(&ring->sched);
5955                 }
5956                 break;
5957         case 1:
5958         case 2:
5959                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5960                         ring = &adev->gfx.compute_ring[i];
5961                         if (ring->me == me_id && ring->pipe == pipe_id &&
5962                             ring->queue == queue_id)
5963                                 drm_sched_fault(&ring->sched);
5964                 }
5965                 break;
5966         default:
5967                 BUG();
5968                 break;
5969         }
5970 }
5971
5972 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
5973                                   struct amdgpu_irq_src *source,
5974                                   struct amdgpu_iv_entry *entry)
5975 {
5976         DRM_ERROR("Illegal register access in command stream\n");
5977         gfx_v11_0_handle_priv_fault(adev, entry);
5978         return 0;
5979 }
5980
5981 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev,
5982                                    struct amdgpu_irq_src *source,
5983                                    struct amdgpu_iv_entry *entry)
5984 {
5985         DRM_ERROR("Illegal instruction in command stream\n");
5986         gfx_v11_0_handle_priv_fault(adev, entry);
5987         return 0;
5988 }
5989
5990 #if 0
5991 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
5992                                              struct amdgpu_irq_src *src,
5993                                              unsigned int type,
5994                                              enum amdgpu_interrupt_state state)
5995 {
5996         uint32_t tmp, target;
5997         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
5998
5999         target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
6000         target += ring->pipe;
6001
6002         switch (type) {
6003         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6004                 if (state == AMDGPU_IRQ_STATE_DISABLE) {
6005                         tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6006                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6007                                             GENERIC2_INT_ENABLE, 0);
6008                         WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6009
6010                         tmp = RREG32_SOC15_IP(GC, target);
6011                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6012                                             GENERIC2_INT_ENABLE, 0);
6013                         WREG32_SOC15_IP(GC, target, tmp);
6014                 } else {
6015                         tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6016                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6017                                             GENERIC2_INT_ENABLE, 1);
6018                         WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6019
6020                         tmp = RREG32_SOC15_IP(GC, target);
6021                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6022                                             GENERIC2_INT_ENABLE, 1);
6023                         WREG32_SOC15_IP(GC, target, tmp);
6024                 }
6025                 break;
6026         default:
6027                 BUG(); /* kiq only support GENERIC2_INT now */
6028                 break;
6029         }
6030         return 0;
6031 }
6032 #endif
6033
6034 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
6035 {
6036         const unsigned int gcr_cntl =
6037                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
6038                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
6039                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
6040                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
6041                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
6042                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
6043                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
6044                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
6045
6046         /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
6047         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
6048         amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
6049         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6050         amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6051         amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6052         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6053         amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6054         amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
6055 }
6056
6057 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
6058         .name = "gfx_v11_0",
6059         .early_init = gfx_v11_0_early_init,
6060         .late_init = gfx_v11_0_late_init,
6061         .sw_init = gfx_v11_0_sw_init,
6062         .sw_fini = gfx_v11_0_sw_fini,
6063         .hw_init = gfx_v11_0_hw_init,
6064         .hw_fini = gfx_v11_0_hw_fini,
6065         .suspend = gfx_v11_0_suspend,
6066         .resume = gfx_v11_0_resume,
6067         .is_idle = gfx_v11_0_is_idle,
6068         .wait_for_idle = gfx_v11_0_wait_for_idle,
6069         .soft_reset = gfx_v11_0_soft_reset,
6070         .check_soft_reset = gfx_v11_0_check_soft_reset,
6071         .set_clockgating_state = gfx_v11_0_set_clockgating_state,
6072         .set_powergating_state = gfx_v11_0_set_powergating_state,
6073         .get_clockgating_state = gfx_v11_0_get_clockgating_state,
6074 };
6075
6076 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
6077         .type = AMDGPU_RING_TYPE_GFX,
6078         .align_mask = 0xff,
6079         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6080         .support_64bit_ptrs = true,
6081         .vmhub = AMDGPU_GFXHUB_0,
6082         .get_rptr = gfx_v11_0_ring_get_rptr_gfx,
6083         .get_wptr = gfx_v11_0_ring_get_wptr_gfx,
6084         .set_wptr = gfx_v11_0_ring_set_wptr_gfx,
6085         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6086                 5 + /* COND_EXEC */
6087                 7 + /* PIPELINE_SYNC */
6088                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6089                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6090                 2 + /* VM_FLUSH */
6091                 8 + /* FENCE for VM_FLUSH */
6092                 20 + /* GDS switch */
6093                 5 + /* COND_EXEC */
6094                 7 + /* HDP_flush */
6095                 4 + /* VGT_flush */
6096                 31 + /* DE_META */
6097                 3 + /* CNTX_CTRL */
6098                 5 + /* HDP_INVL */
6099                 8 + 8 + /* FENCE x2 */
6100                 8, /* gfx_v11_0_emit_mem_sync */
6101         .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */
6102         .emit_ib = gfx_v11_0_ring_emit_ib_gfx,
6103         .emit_fence = gfx_v11_0_ring_emit_fence,
6104         .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6105         .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6106         .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6107         .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6108         .test_ring = gfx_v11_0_ring_test_ring,
6109         .test_ib = gfx_v11_0_ring_test_ib,
6110         .insert_nop = amdgpu_ring_insert_nop,
6111         .pad_ib = amdgpu_ring_generic_pad_ib,
6112         .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
6113         .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
6114         .patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec,
6115         .preempt_ib = gfx_v11_0_ring_preempt_ib,
6116         .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
6117         .emit_wreg = gfx_v11_0_ring_emit_wreg,
6118         .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6119         .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6120         .soft_recovery = gfx_v11_0_ring_soft_recovery,
6121         .emit_mem_sync = gfx_v11_0_emit_mem_sync,
6122 };
6123
6124 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
6125         .type = AMDGPU_RING_TYPE_COMPUTE,
6126         .align_mask = 0xff,
6127         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6128         .support_64bit_ptrs = true,
6129         .vmhub = AMDGPU_GFXHUB_0,
6130         .get_rptr = gfx_v11_0_ring_get_rptr_compute,
6131         .get_wptr = gfx_v11_0_ring_get_wptr_compute,
6132         .set_wptr = gfx_v11_0_ring_set_wptr_compute,
6133         .emit_frame_size =
6134                 20 + /* gfx_v11_0_ring_emit_gds_switch */
6135                 7 + /* gfx_v11_0_ring_emit_hdp_flush */
6136                 5 + /* hdp invalidate */
6137                 7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6138                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6139                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6140                 2 + /* gfx_v11_0_ring_emit_vm_flush */
6141                 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
6142                 8, /* gfx_v11_0_emit_mem_sync */
6143         .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
6144         .emit_ib = gfx_v11_0_ring_emit_ib_compute,
6145         .emit_fence = gfx_v11_0_ring_emit_fence,
6146         .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6147         .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6148         .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6149         .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6150         .test_ring = gfx_v11_0_ring_test_ring,
6151         .test_ib = gfx_v11_0_ring_test_ib,
6152         .insert_nop = amdgpu_ring_insert_nop,
6153         .pad_ib = amdgpu_ring_generic_pad_ib,
6154         .emit_wreg = gfx_v11_0_ring_emit_wreg,
6155         .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6156         .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6157         .emit_mem_sync = gfx_v11_0_emit_mem_sync,
6158 };
6159
6160 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
6161         .type = AMDGPU_RING_TYPE_KIQ,
6162         .align_mask = 0xff,
6163         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6164         .support_64bit_ptrs = true,
6165         .vmhub = AMDGPU_GFXHUB_0,
6166         .get_rptr = gfx_v11_0_ring_get_rptr_compute,
6167         .get_wptr = gfx_v11_0_ring_get_wptr_compute,
6168         .set_wptr = gfx_v11_0_ring_set_wptr_compute,
6169         .emit_frame_size =
6170                 20 + /* gfx_v11_0_ring_emit_gds_switch */
6171                 7 + /* gfx_v11_0_ring_emit_hdp_flush */
6172                 5 + /*hdp invalidate */
6173                 7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6174                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6175                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6176                 2 + /* gfx_v11_0_ring_emit_vm_flush */
6177                 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6178         .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
6179         .emit_ib = gfx_v11_0_ring_emit_ib_compute,
6180         .emit_fence = gfx_v11_0_ring_emit_fence_kiq,
6181         .test_ring = gfx_v11_0_ring_test_ring,
6182         .test_ib = gfx_v11_0_ring_test_ib,
6183         .insert_nop = amdgpu_ring_insert_nop,
6184         .pad_ib = amdgpu_ring_generic_pad_ib,
6185         .emit_rreg = gfx_v11_0_ring_emit_rreg,
6186         .emit_wreg = gfx_v11_0_ring_emit_wreg,
6187         .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6188         .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6189 };
6190
6191 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
6192 {
6193         int i;
6194
6195         adev->gfx.kiq.ring.funcs = &gfx_v11_0_ring_funcs_kiq;
6196
6197         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6198                 adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx;
6199
6200         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6201                 adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute;
6202 }
6203
6204 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = {
6205         .set = gfx_v11_0_set_eop_interrupt_state,
6206         .process = gfx_v11_0_eop_irq,
6207 };
6208
6209 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = {
6210         .set = gfx_v11_0_set_priv_reg_fault_state,
6211         .process = gfx_v11_0_priv_reg_irq,
6212 };
6213
6214 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
6215         .set = gfx_v11_0_set_priv_inst_fault_state,
6216         .process = gfx_v11_0_priv_inst_irq,
6217 };
6218
6219 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
6220 {
6221         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6222         adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs;
6223
6224         adev->gfx.priv_reg_irq.num_types = 1;
6225         adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs;
6226
6227         adev->gfx.priv_inst_irq.num_types = 1;
6228         adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
6229 }
6230
6231 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
6232 {
6233         if (adev->flags & AMD_IS_APU)
6234                 adev->gfx.imu.mode = MISSION_MODE;
6235         else
6236                 adev->gfx.imu.mode = DEBUG_MODE;
6237
6238         adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
6239 }
6240
6241 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev)
6242 {
6243         adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs;
6244 }
6245
6246 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev)
6247 {
6248         unsigned total_cu = adev->gfx.config.max_cu_per_sh *
6249                             adev->gfx.config.max_sh_per_se *
6250                             adev->gfx.config.max_shader_engines;
6251
6252         adev->gds.gds_size = 0x1000;
6253         adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
6254         adev->gds.gws_size = 64;
6255         adev->gds.oa_size = 16;
6256 }
6257
6258 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev)
6259 {
6260         /* set gfx eng mqd */
6261         adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
6262                 sizeof(struct v11_gfx_mqd);
6263         adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
6264                 gfx_v11_0_gfx_mqd_init;
6265         /* set compute eng mqd */
6266         adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
6267                 sizeof(struct v11_compute_mqd);
6268         adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
6269                 gfx_v11_0_compute_mqd_init;
6270 }
6271
6272 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
6273                                                           u32 bitmap)
6274 {
6275         u32 data;
6276
6277         if (!bitmap)
6278                 return;
6279
6280         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
6281         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
6282
6283         WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
6284 }
6285
6286 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
6287 {
6288         u32 data, wgp_bitmask;
6289         data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
6290         data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
6291
6292         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
6293         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
6294
6295         wgp_bitmask =
6296                 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
6297
6298         return (~data) & wgp_bitmask;
6299 }
6300
6301 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
6302 {
6303         u32 wgp_idx, wgp_active_bitmap;
6304         u32 cu_bitmap_per_wgp, cu_active_bitmap;
6305
6306         wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev);
6307         cu_active_bitmap = 0;
6308
6309         for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
6310                 /* if there is one WGP enabled, it means 2 CUs will be enabled */
6311                 cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
6312                 if (wgp_active_bitmap & (1 << wgp_idx))
6313                         cu_active_bitmap |= cu_bitmap_per_wgp;
6314         }
6315
6316         return cu_active_bitmap;
6317 }
6318
6319 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
6320                                  struct amdgpu_cu_info *cu_info)
6321 {
6322         int i, j, k, counter, active_cu_number = 0;
6323         u32 mask, bitmap;
6324         unsigned disable_masks[8 * 2];
6325
6326         if (!adev || !cu_info)
6327                 return -EINVAL;
6328
6329         amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
6330
6331         mutex_lock(&adev->grbm_idx_mutex);
6332         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6333                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6334                         mask = 1;
6335                         counter = 0;
6336                         gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff);
6337                         if (i < 8 && j < 2)
6338                                 gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(
6339                                         adev, disable_masks[i * 2 + j]);
6340                         bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev);
6341
6342                         /**
6343                          * GFX11 could support more than 4 SEs, while the bitmap
6344                          * in cu_info struct is 4x4 and ioctl interface struct
6345                          * drm_amdgpu_info_device should keep stable.
6346                          * So we use last two columns of bitmap to store cu mask for
6347                          * SEs 4 to 7, the layout of the bitmap is as below:
6348                          *    SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
6349                          *    SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
6350                          *    SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
6351                          *    SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
6352                          *    SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
6353                          *    SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
6354                          *    SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
6355                          *    SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
6356                          */
6357                         cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap;
6358
6359                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
6360                                 if (bitmap & mask)
6361                                         counter++;
6362
6363                                 mask <<= 1;
6364                         }
6365                         active_cu_number += counter;
6366                 }
6367         }
6368         gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6369         mutex_unlock(&adev->grbm_idx_mutex);
6370
6371         cu_info->number = active_cu_number;
6372         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6373
6374         return 0;
6375 }
6376
6377 const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
6378 {
6379         .type = AMD_IP_BLOCK_TYPE_GFX,
6380         .major = 11,
6381         .minor = 0,
6382         .rev = 0,
6383         .funcs = &gfx_v11_0_ip_funcs,
6384 };