Merge tag 'perf-tools-fixes-for-v6.5-2-2023-08-03' of git://git.kernel.org/pub/scm...
[platform/kernel/linux-starfive.git] / drivers / gpu / drm / amd / amdkfd / kfd_device_queue_manager.c
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /*
3  * Copyright 2014-2022 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  *
23  */
24
25 #include <linux/ratelimit.h>
26 #include <linux/printk.h>
27 #include <linux/slab.h>
28 #include <linux/list.h>
29 #include <linux/types.h>
30 #include <linux/bitops.h>
31 #include <linux/sched.h>
32 #include "kfd_priv.h"
33 #include "kfd_device_queue_manager.h"
34 #include "kfd_mqd_manager.h"
35 #include "cik_regs.h"
36 #include "kfd_kernel_queue.h"
37 #include "amdgpu_amdkfd.h"
38 #include "mes_api_def.h"
39 #include "kfd_debug.h"
40
41 /* Size of the per-pipe EOP queue */
42 #define CIK_HPD_EOP_BYTES_LOG2 11
43 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
44
45 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
46                                   u32 pasid, unsigned int vmid);
47
48 static int execute_queues_cpsch(struct device_queue_manager *dqm,
49                                 enum kfd_unmap_queues_filter filter,
50                                 uint32_t filter_param,
51                                 uint32_t grace_period);
52 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
53                                 enum kfd_unmap_queues_filter filter,
54                                 uint32_t filter_param,
55                                 uint32_t grace_period,
56                                 bool reset);
57
58 static int map_queues_cpsch(struct device_queue_manager *dqm);
59
60 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
61                                 struct queue *q);
62
63 static inline void deallocate_hqd(struct device_queue_manager *dqm,
64                                 struct queue *q);
65 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
66 static int allocate_sdma_queue(struct device_queue_manager *dqm,
67                                 struct queue *q, const uint32_t *restore_sdma_id);
68 static void kfd_process_hw_exception(struct work_struct *work);
69
70 static inline
71 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
72 {
73         if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
74                 return KFD_MQD_TYPE_SDMA;
75         return KFD_MQD_TYPE_CP;
76 }
77
78 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
79 {
80         int i;
81         int pipe_offset = (mec * dqm->dev->kfd->shared_resources.num_pipe_per_mec
82                 + pipe) * dqm->dev->kfd->shared_resources.num_queue_per_pipe;
83
84         /* queue is available for KFD usage if bit is 1 */
85         for (i = 0; i <  dqm->dev->kfd->shared_resources.num_queue_per_pipe; ++i)
86                 if (test_bit(pipe_offset + i,
87                               dqm->dev->kfd->shared_resources.cp_queue_bitmap))
88                         return true;
89         return false;
90 }
91
92 unsigned int get_cp_queues_num(struct device_queue_manager *dqm)
93 {
94         return bitmap_weight(dqm->dev->kfd->shared_resources.cp_queue_bitmap,
95                                 KGD_MAX_QUEUES);
96 }
97
98 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
99 {
100         return dqm->dev->kfd->shared_resources.num_queue_per_pipe;
101 }
102
103 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
104 {
105         return dqm->dev->kfd->shared_resources.num_pipe_per_mec;
106 }
107
108 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
109 {
110         return kfd_get_num_sdma_engines(dqm->dev) +
111                 kfd_get_num_xgmi_sdma_engines(dqm->dev);
112 }
113
114 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
115 {
116         return kfd_get_num_sdma_engines(dqm->dev) *
117                 dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
118 }
119
120 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
121 {
122         return kfd_get_num_xgmi_sdma_engines(dqm->dev) *
123                 dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
124 }
125
126 static void init_sdma_bitmaps(struct device_queue_manager *dqm)
127 {
128         bitmap_zero(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES);
129         bitmap_set(dqm->sdma_bitmap, 0, get_num_sdma_queues(dqm));
130
131         bitmap_zero(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES);
132         bitmap_set(dqm->xgmi_sdma_bitmap, 0, get_num_xgmi_sdma_queues(dqm));
133
134         /* Mask out the reserved queues */
135         bitmap_andnot(dqm->sdma_bitmap, dqm->sdma_bitmap,
136                       dqm->dev->kfd->device_info.reserved_sdma_queues_bitmap,
137                       KFD_MAX_SDMA_QUEUES);
138 }
139
140 void program_sh_mem_settings(struct device_queue_manager *dqm,
141                                         struct qcm_process_device *qpd)
142 {
143         uint32_t xcc_mask = dqm->dev->xcc_mask;
144         int xcc_id;
145
146         for_each_inst(xcc_id, xcc_mask)
147                 dqm->dev->kfd2kgd->program_sh_mem_settings(
148                         dqm->dev->adev, qpd->vmid, qpd->sh_mem_config,
149                         qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit,
150                         qpd->sh_mem_bases, xcc_id);
151 }
152
153 static void kfd_hws_hang(struct device_queue_manager *dqm)
154 {
155         /*
156          * Issue a GPU reset if HWS is unresponsive
157          */
158         dqm->is_hws_hang = true;
159
160         /* It's possible we're detecting a HWS hang in the
161          * middle of a GPU reset. No need to schedule another
162          * reset in this case.
163          */
164         if (!dqm->is_resetting)
165                 schedule_work(&dqm->hw_exception_work);
166 }
167
168 static int convert_to_mes_queue_type(int queue_type)
169 {
170         int mes_queue_type;
171
172         switch (queue_type) {
173         case KFD_QUEUE_TYPE_COMPUTE:
174                 mes_queue_type = MES_QUEUE_TYPE_COMPUTE;
175                 break;
176         case KFD_QUEUE_TYPE_SDMA:
177                 mes_queue_type = MES_QUEUE_TYPE_SDMA;
178                 break;
179         default:
180                 WARN(1, "Invalid queue type %d", queue_type);
181                 mes_queue_type = -EINVAL;
182                 break;
183         }
184
185         return mes_queue_type;
186 }
187
188 static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
189                          struct qcm_process_device *qpd)
190 {
191         struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
192         struct kfd_process_device *pdd = qpd_to_pdd(qpd);
193         struct mes_add_queue_input queue_input;
194         int r, queue_type;
195         uint64_t wptr_addr_off;
196
197         if (dqm->is_hws_hang)
198                 return -EIO;
199
200         memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
201         queue_input.process_id = qpd->pqm->process->pasid;
202         queue_input.page_table_base_addr =  qpd->page_table_base;
203         queue_input.process_va_start = 0;
204         queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
205         /* MES unit for quantum is 100ns */
206         queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM;  /* Equivalent to 10ms. */
207         queue_input.process_context_addr = pdd->proc_ctx_gpu_addr;
208         queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */
209         queue_input.gang_context_addr = q->gang_ctx_gpu_addr;
210         queue_input.inprocess_gang_priority = q->properties.priority;
211         queue_input.gang_global_priority_level =
212                                         AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
213         queue_input.doorbell_offset = q->properties.doorbell_off;
214         queue_input.mqd_addr = q->gart_mqd_addr;
215         queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
216
217         if (q->wptr_bo) {
218                 wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1);
219                 queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off;
220         }
221
222         queue_input.is_kfd_process = 1;
223         queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL);
224         queue_input.queue_size = q->properties.queue_size >> 2;
225
226         queue_input.paging = false;
227         queue_input.tba_addr = qpd->tba_addr;
228         queue_input.tma_addr = qpd->tma_addr;
229         queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device);
230         queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled;
231
232         queue_type = convert_to_mes_queue_type(q->properties.type);
233         if (queue_type < 0) {
234                 pr_err("Queue type not supported with MES, queue:%d\n",
235                                 q->properties.type);
236                 return -EINVAL;
237         }
238         queue_input.queue_type = (uint32_t)queue_type;
239
240         if (q->gws) {
241                 queue_input.gws_base = 0;
242                 queue_input.gws_size = qpd->num_gws;
243         }
244
245         amdgpu_mes_lock(&adev->mes);
246         r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
247         amdgpu_mes_unlock(&adev->mes);
248         if (r) {
249                 pr_err("failed to add hardware queue to MES, doorbell=0x%x\n",
250                         q->properties.doorbell_off);
251                 pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
252                 kfd_hws_hang(dqm);
253 }
254
255         return r;
256 }
257
258 static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
259                         struct qcm_process_device *qpd)
260 {
261         struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
262         int r;
263         struct mes_remove_queue_input queue_input;
264
265         if (dqm->is_hws_hang)
266                 return -EIO;
267
268         memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
269         queue_input.doorbell_offset = q->properties.doorbell_off;
270         queue_input.gang_context_addr = q->gang_ctx_gpu_addr;
271
272         amdgpu_mes_lock(&adev->mes);
273         r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
274         amdgpu_mes_unlock(&adev->mes);
275
276         if (r) {
277                 pr_err("failed to remove hardware queue from MES, doorbell=0x%x\n",
278                         q->properties.doorbell_off);
279                 pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
280                 kfd_hws_hang(dqm);
281         }
282
283         return r;
284 }
285
286 static int remove_all_queues_mes(struct device_queue_manager *dqm)
287 {
288         struct device_process_node *cur;
289         struct qcm_process_device *qpd;
290         struct queue *q;
291         int retval = 0;
292
293         list_for_each_entry(cur, &dqm->queues, list) {
294                 qpd = cur->qpd;
295                 list_for_each_entry(q, &qpd->queues_list, list) {
296                         if (q->properties.is_active) {
297                                 retval = remove_queue_mes(dqm, q, qpd);
298                                 if (retval) {
299                                         pr_err("%s: Failed to remove queue %d for dev %d",
300                                                 __func__,
301                                                 q->properties.queue_id,
302                                                 dqm->dev->id);
303                                         return retval;
304                                 }
305                         }
306                 }
307         }
308
309         return retval;
310 }
311
312 static void increment_queue_count(struct device_queue_manager *dqm,
313                                   struct qcm_process_device *qpd,
314                                   struct queue *q)
315 {
316         dqm->active_queue_count++;
317         if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
318             q->properties.type == KFD_QUEUE_TYPE_DIQ)
319                 dqm->active_cp_queue_count++;
320
321         if (q->properties.is_gws) {
322                 dqm->gws_queue_count++;
323                 qpd->mapped_gws_queue = true;
324         }
325 }
326
327 static void decrement_queue_count(struct device_queue_manager *dqm,
328                                   struct qcm_process_device *qpd,
329                                   struct queue *q)
330 {
331         dqm->active_queue_count--;
332         if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
333             q->properties.type == KFD_QUEUE_TYPE_DIQ)
334                 dqm->active_cp_queue_count--;
335
336         if (q->properties.is_gws) {
337                 dqm->gws_queue_count--;
338                 qpd->mapped_gws_queue = false;
339         }
340 }
341
342 /*
343  * Allocate a doorbell ID to this queue.
344  * If doorbell_id is passed in, make sure requested ID is valid then allocate it.
345  */
346 static int allocate_doorbell(struct qcm_process_device *qpd,
347                              struct queue *q,
348                              uint32_t const *restore_id)
349 {
350         struct kfd_node *dev = qpd->dqm->dev;
351
352         if (!KFD_IS_SOC15(dev)) {
353                 /* On pre-SOC15 chips we need to use the queue ID to
354                  * preserve the user mode ABI.
355                  */
356
357                 if (restore_id && *restore_id != q->properties.queue_id)
358                         return -EINVAL;
359
360                 q->doorbell_id = q->properties.queue_id;
361         } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
362                         q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
363                 /* For SDMA queues on SOC15 with 8-byte doorbell, use static
364                  * doorbell assignments based on the engine and queue id.
365                  * The doobell index distance between RLC (2*i) and (2*i+1)
366                  * for a SDMA engine is 512.
367                  */
368
369                 uint32_t *idx_offset = dev->kfd->shared_resources.sdma_doorbell_idx;
370
371                 /*
372                  * q->properties.sdma_engine_id corresponds to the virtual
373                  * sdma engine number. However, for doorbell allocation,
374                  * we need the physical sdma engine id in order to get the
375                  * correct doorbell offset.
376                  */
377                 uint32_t valid_id = idx_offset[qpd->dqm->dev->node_id *
378                                                get_num_all_sdma_engines(qpd->dqm) +
379                                                q->properties.sdma_engine_id]
380                                                 + (q->properties.sdma_queue_id & 1)
381                                                 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET
382                                                 + (q->properties.sdma_queue_id >> 1);
383
384                 if (restore_id && *restore_id != valid_id)
385                         return -EINVAL;
386                 q->doorbell_id = valid_id;
387         } else {
388                 /* For CP queues on SOC15 */
389                 if (restore_id) {
390                         /* make sure that ID is free  */
391                         if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap))
392                                 return -EINVAL;
393
394                         q->doorbell_id = *restore_id;
395                 } else {
396                         /* or reserve a free doorbell ID */
397                         unsigned int found;
398
399                         found = find_first_zero_bit(qpd->doorbell_bitmap,
400                                                 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
401                         if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
402                                 pr_debug("No doorbells available");
403                                 return -EBUSY;
404                         }
405                         set_bit(found, qpd->doorbell_bitmap);
406                         q->doorbell_id = found;
407                 }
408         }
409
410         q->properties.doorbell_off =
411                 kfd_get_doorbell_dw_offset_in_bar(dev->kfd, qpd_to_pdd(qpd),
412                                           q->doorbell_id);
413         return 0;
414 }
415
416 static void deallocate_doorbell(struct qcm_process_device *qpd,
417                                 struct queue *q)
418 {
419         unsigned int old;
420         struct kfd_node *dev = qpd->dqm->dev;
421
422         if (!KFD_IS_SOC15(dev) ||
423             q->properties.type == KFD_QUEUE_TYPE_SDMA ||
424             q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
425                 return;
426
427         old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
428         WARN_ON(!old);
429 }
430
431 static void program_trap_handler_settings(struct device_queue_manager *dqm,
432                                 struct qcm_process_device *qpd)
433 {
434         uint32_t xcc_mask = dqm->dev->xcc_mask;
435         int xcc_id;
436
437         if (dqm->dev->kfd2kgd->program_trap_handler_settings)
438                 for_each_inst(xcc_id, xcc_mask)
439                         dqm->dev->kfd2kgd->program_trap_handler_settings(
440                                 dqm->dev->adev, qpd->vmid, qpd->tba_addr,
441                                 qpd->tma_addr, xcc_id);
442 }
443
444 static int allocate_vmid(struct device_queue_manager *dqm,
445                         struct qcm_process_device *qpd,
446                         struct queue *q)
447 {
448         int allocated_vmid = -1, i;
449
450         for (i = dqm->dev->vm_info.first_vmid_kfd;
451                         i <= dqm->dev->vm_info.last_vmid_kfd; i++) {
452                 if (!dqm->vmid_pasid[i]) {
453                         allocated_vmid = i;
454                         break;
455                 }
456         }
457
458         if (allocated_vmid < 0) {
459                 pr_err("no more vmid to allocate\n");
460                 return -ENOSPC;
461         }
462
463         pr_debug("vmid allocated: %d\n", allocated_vmid);
464
465         dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
466
467         set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
468
469         qpd->vmid = allocated_vmid;
470         q->properties.vmid = allocated_vmid;
471
472         program_sh_mem_settings(dqm, qpd);
473
474         if (KFD_IS_SOC15(dqm->dev) && dqm->dev->kfd->cwsr_enabled)
475                 program_trap_handler_settings(dqm, qpd);
476
477         /* qpd->page_table_base is set earlier when register_process()
478          * is called, i.e. when the first queue is created.
479          */
480         dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev,
481                         qpd->vmid,
482                         qpd->page_table_base);
483         /* invalidate the VM context after pasid and vmid mapping is set up */
484         kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
485
486         if (dqm->dev->kfd2kgd->set_scratch_backing_va)
487                 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev,
488                                 qpd->sh_hidden_private_base, qpd->vmid);
489
490         return 0;
491 }
492
493 static int flush_texture_cache_nocpsch(struct kfd_node *kdev,
494                                 struct qcm_process_device *qpd)
495 {
496         const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf;
497         int ret;
498
499         if (!qpd->ib_kaddr)
500                 return -ENOMEM;
501
502         ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
503         if (ret)
504                 return ret;
505
506         return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid,
507                                 qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
508                                 pmf->release_mem_size / sizeof(uint32_t));
509 }
510
511 static void deallocate_vmid(struct device_queue_manager *dqm,
512                                 struct qcm_process_device *qpd,
513                                 struct queue *q)
514 {
515         /* On GFX v7, CP doesn't flush TC at dequeue */
516         if (q->device->adev->asic_type == CHIP_HAWAII)
517                 if (flush_texture_cache_nocpsch(q->device, qpd))
518                         pr_err("Failed to flush TC\n");
519
520         kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
521
522         /* Release the vmid mapping */
523         set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
524         dqm->vmid_pasid[qpd->vmid] = 0;
525
526         qpd->vmid = 0;
527         q->properties.vmid = 0;
528 }
529
530 static int create_queue_nocpsch(struct device_queue_manager *dqm,
531                                 struct queue *q,
532                                 struct qcm_process_device *qpd,
533                                 const struct kfd_criu_queue_priv_data *qd,
534                                 const void *restore_mqd, const void *restore_ctl_stack)
535 {
536         struct mqd_manager *mqd_mgr;
537         int retval;
538
539         dqm_lock(dqm);
540
541         if (dqm->total_queue_count >= max_num_of_queues_per_device) {
542                 pr_warn("Can't create new usermode queue because %d queues were already created\n",
543                                 dqm->total_queue_count);
544                 retval = -EPERM;
545                 goto out_unlock;
546         }
547
548         if (list_empty(&qpd->queues_list)) {
549                 retval = allocate_vmid(dqm, qpd, q);
550                 if (retval)
551                         goto out_unlock;
552         }
553         q->properties.vmid = qpd->vmid;
554         /*
555          * Eviction state logic: mark all queues as evicted, even ones
556          * not currently active. Restoring inactive queues later only
557          * updates the is_evicted flag but is a no-op otherwise.
558          */
559         q->properties.is_evicted = !!qpd->evicted;
560
561         q->properties.tba_addr = qpd->tba_addr;
562         q->properties.tma_addr = qpd->tma_addr;
563
564         mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
565                         q->properties.type)];
566         if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
567                 retval = allocate_hqd(dqm, q);
568                 if (retval)
569                         goto deallocate_vmid;
570                 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
571                         q->pipe, q->queue);
572         } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
573                 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
574                 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
575                 if (retval)
576                         goto deallocate_vmid;
577                 dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
578         }
579
580         retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
581         if (retval)
582                 goto out_deallocate_hqd;
583
584         /* Temporarily release dqm lock to avoid a circular lock dependency */
585         dqm_unlock(dqm);
586         q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
587         dqm_lock(dqm);
588
589         if (!q->mqd_mem_obj) {
590                 retval = -ENOMEM;
591                 goto out_deallocate_doorbell;
592         }
593
594         if (qd)
595                 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
596                                      &q->properties, restore_mqd, restore_ctl_stack,
597                                      qd->ctl_stack_size);
598         else
599                 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
600                                         &q->gart_mqd_addr, &q->properties);
601
602         if (q->properties.is_active) {
603                 if (!dqm->sched_running) {
604                         WARN_ONCE(1, "Load non-HWS mqd while stopped\n");
605                         goto add_queue_to_list;
606                 }
607
608                 if (WARN(q->process->mm != current->mm,
609                                         "should only run in user thread"))
610                         retval = -EFAULT;
611                 else
612                         retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
613                                         q->queue, &q->properties, current->mm);
614                 if (retval)
615                         goto out_free_mqd;
616         }
617
618 add_queue_to_list:
619         list_add(&q->list, &qpd->queues_list);
620         qpd->queue_count++;
621         if (q->properties.is_active)
622                 increment_queue_count(dqm, qpd, q);
623
624         /*
625          * Unconditionally increment this counter, regardless of the queue's
626          * type or whether the queue is active.
627          */
628         dqm->total_queue_count++;
629         pr_debug("Total of %d queues are accountable so far\n",
630                         dqm->total_queue_count);
631         goto out_unlock;
632
633 out_free_mqd:
634         mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
635 out_deallocate_doorbell:
636         deallocate_doorbell(qpd, q);
637 out_deallocate_hqd:
638         if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
639                 deallocate_hqd(dqm, q);
640         else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
641                 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
642                 deallocate_sdma_queue(dqm, q);
643 deallocate_vmid:
644         if (list_empty(&qpd->queues_list))
645                 deallocate_vmid(dqm, qpd, q);
646 out_unlock:
647         dqm_unlock(dqm);
648         return retval;
649 }
650
651 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
652 {
653         bool set;
654         int pipe, bit, i;
655
656         set = false;
657
658         for (pipe = dqm->next_pipe_to_allocate, i = 0;
659                         i < get_pipes_per_mec(dqm);
660                         pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
661
662                 if (!is_pipe_enabled(dqm, 0, pipe))
663                         continue;
664
665                 if (dqm->allocated_queues[pipe] != 0) {
666                         bit = ffs(dqm->allocated_queues[pipe]) - 1;
667                         dqm->allocated_queues[pipe] &= ~(1 << bit);
668                         q->pipe = pipe;
669                         q->queue = bit;
670                         set = true;
671                         break;
672                 }
673         }
674
675         if (!set)
676                 return -EBUSY;
677
678         pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
679         /* horizontal hqd allocation */
680         dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
681
682         return 0;
683 }
684
685 static inline void deallocate_hqd(struct device_queue_manager *dqm,
686                                 struct queue *q)
687 {
688         dqm->allocated_queues[q->pipe] |= (1 << q->queue);
689 }
690
691 #define SQ_IND_CMD_CMD_KILL             0x00000003
692 #define SQ_IND_CMD_MODE_BROADCAST       0x00000001
693
694 static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process *p)
695 {
696         int status = 0;
697         unsigned int vmid;
698         uint16_t queried_pasid;
699         union SQ_CMD_BITS reg_sq_cmd;
700         union GRBM_GFX_INDEX_BITS reg_gfx_index;
701         struct kfd_process_device *pdd;
702         int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
703         int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
704         uint32_t xcc_mask = dev->xcc_mask;
705         int xcc_id;
706
707         reg_sq_cmd.u32All = 0;
708         reg_gfx_index.u32All = 0;
709
710         pr_debug("Killing all process wavefronts\n");
711
712         if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) {
713                 pr_err("no vmid pasid mapping supported \n");
714                 return -EOPNOTSUPP;
715         }
716
717         /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
718          * ATC_VMID15_PASID_MAPPING
719          * to check which VMID the current process is mapped to.
720          */
721
722         for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
723                 status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
724                                 (dev->adev, vmid, &queried_pasid);
725
726                 if (status && queried_pasid == p->pasid) {
727                         pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
728                                         vmid, p->pasid);
729                         break;
730                 }
731         }
732
733         if (vmid > last_vmid_to_scan) {
734                 pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
735                 return -EFAULT;
736         }
737
738         /* taking the VMID for that process on the safe way using PDD */
739         pdd = kfd_get_process_device_data(dev, p);
740         if (!pdd)
741                 return -EFAULT;
742
743         reg_gfx_index.bits.sh_broadcast_writes = 1;
744         reg_gfx_index.bits.se_broadcast_writes = 1;
745         reg_gfx_index.bits.instance_broadcast_writes = 1;
746         reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
747         reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
748         reg_sq_cmd.bits.vm_id = vmid;
749
750         for_each_inst(xcc_id, xcc_mask)
751                 dev->kfd2kgd->wave_control_execute(
752                         dev->adev, reg_gfx_index.u32All,
753                         reg_sq_cmd.u32All, xcc_id);
754
755         return 0;
756 }
757
758 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
759  * to avoid asynchronized access
760  */
761 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
762                                 struct qcm_process_device *qpd,
763                                 struct queue *q)
764 {
765         int retval;
766         struct mqd_manager *mqd_mgr;
767
768         mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
769                         q->properties.type)];
770
771         if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
772                 deallocate_hqd(dqm, q);
773         else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
774                 deallocate_sdma_queue(dqm, q);
775         else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
776                 deallocate_sdma_queue(dqm, q);
777         else {
778                 pr_debug("q->properties.type %d is invalid\n",
779                                 q->properties.type);
780                 return -EINVAL;
781         }
782         dqm->total_queue_count--;
783
784         deallocate_doorbell(qpd, q);
785
786         if (!dqm->sched_running) {
787                 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n");
788                 return 0;
789         }
790
791         retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
792                                 KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
793                                 KFD_UNMAP_LATENCY_MS,
794                                 q->pipe, q->queue);
795         if (retval == -ETIME)
796                 qpd->reset_wavefronts = true;
797
798         list_del(&q->list);
799         if (list_empty(&qpd->queues_list)) {
800                 if (qpd->reset_wavefronts) {
801                         pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
802                                         dqm->dev);
803                         /* dbgdev_wave_reset_wavefronts has to be called before
804                          * deallocate_vmid(), i.e. when vmid is still in use.
805                          */
806                         dbgdev_wave_reset_wavefronts(dqm->dev,
807                                         qpd->pqm->process);
808                         qpd->reset_wavefronts = false;
809                 }
810
811                 deallocate_vmid(dqm, qpd, q);
812         }
813         qpd->queue_count--;
814         if (q->properties.is_active)
815                 decrement_queue_count(dqm, qpd, q);
816
817         return retval;
818 }
819
820 static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
821                                 struct qcm_process_device *qpd,
822                                 struct queue *q)
823 {
824         int retval;
825         uint64_t sdma_val = 0;
826         struct kfd_process_device *pdd = qpd_to_pdd(qpd);
827         struct mqd_manager *mqd_mgr =
828                 dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)];
829
830         /* Get the SDMA queue stats */
831         if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
832             (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
833                 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
834                                                         &sdma_val);
835                 if (retval)
836                         pr_err("Failed to read SDMA queue counter for queue: %d\n",
837                                 q->properties.queue_id);
838         }
839
840         dqm_lock(dqm);
841         retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
842         if (!retval)
843                 pdd->sdma_past_activity_counter += sdma_val;
844         dqm_unlock(dqm);
845
846         mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
847
848         return retval;
849 }
850
851 static int update_queue(struct device_queue_manager *dqm, struct queue *q,
852                         struct mqd_update_info *minfo)
853 {
854         int retval = 0;
855         struct mqd_manager *mqd_mgr;
856         struct kfd_process_device *pdd;
857         bool prev_active = false;
858
859         dqm_lock(dqm);
860         pdd = kfd_get_process_device_data(q->device, q->process);
861         if (!pdd) {
862                 retval = -ENODEV;
863                 goto out_unlock;
864         }
865         mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
866                         q->properties.type)];
867
868         /* Save previous activity state for counters */
869         prev_active = q->properties.is_active;
870
871         /* Make sure the queue is unmapped before updating the MQD */
872         if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
873                 if (!dqm->dev->kfd->shared_resources.enable_mes)
874                         retval = unmap_queues_cpsch(dqm,
875                                                     KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false);
876                 else if (prev_active)
877                         retval = remove_queue_mes(dqm, q, &pdd->qpd);
878
879                 if (retval) {
880                         pr_err("unmap queue failed\n");
881                         goto out_unlock;
882                 }
883         } else if (prev_active &&
884                    (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
885                     q->properties.type == KFD_QUEUE_TYPE_SDMA ||
886                     q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
887
888                 if (!dqm->sched_running) {
889                         WARN_ONCE(1, "Update non-HWS queue while stopped\n");
890                         goto out_unlock;
891                 }
892
893                 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
894                                 (dqm->dev->kfd->cwsr_enabled ?
895                                  KFD_PREEMPT_TYPE_WAVEFRONT_SAVE :
896                                  KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
897                                 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
898                 if (retval) {
899                         pr_err("destroy mqd failed\n");
900                         goto out_unlock;
901                 }
902         }
903
904         mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo);
905
906         /*
907          * check active state vs. the previous state and modify
908          * counter accordingly. map_queues_cpsch uses the
909          * dqm->active_queue_count to determine whether a new runlist must be
910          * uploaded.
911          */
912         if (q->properties.is_active && !prev_active) {
913                 increment_queue_count(dqm, &pdd->qpd, q);
914         } else if (!q->properties.is_active && prev_active) {
915                 decrement_queue_count(dqm, &pdd->qpd, q);
916         } else if (q->gws && !q->properties.is_gws) {
917                 if (q->properties.is_active) {
918                         dqm->gws_queue_count++;
919                         pdd->qpd.mapped_gws_queue = true;
920                 }
921                 q->properties.is_gws = true;
922         } else if (!q->gws && q->properties.is_gws) {
923                 if (q->properties.is_active) {
924                         dqm->gws_queue_count--;
925                         pdd->qpd.mapped_gws_queue = false;
926                 }
927                 q->properties.is_gws = false;
928         }
929
930         if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
931                 if (!dqm->dev->kfd->shared_resources.enable_mes)
932                         retval = map_queues_cpsch(dqm);
933                 else if (q->properties.is_active)
934                         retval = add_queue_mes(dqm, q, &pdd->qpd);
935         } else if (q->properties.is_active &&
936                  (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
937                   q->properties.type == KFD_QUEUE_TYPE_SDMA ||
938                   q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
939                 if (WARN(q->process->mm != current->mm,
940                          "should only run in user thread"))
941                         retval = -EFAULT;
942                 else
943                         retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
944                                                    q->pipe, q->queue,
945                                                    &q->properties, current->mm);
946         }
947
948 out_unlock:
949         dqm_unlock(dqm);
950         return retval;
951 }
952
953 /* suspend_single_queue does not lock the dqm like the
954  * evict_process_queues_cpsch or evict_process_queues_nocpsch. You should
955  * lock the dqm before calling, and unlock after calling.
956  *
957  * The reason we don't lock the dqm is because this function may be
958  * called on multiple queues in a loop, so rather than locking/unlocking
959  * multiple times, we will just keep the dqm locked for all of the calls.
960  */
961 static int suspend_single_queue(struct device_queue_manager *dqm,
962                                       struct kfd_process_device *pdd,
963                                       struct queue *q)
964 {
965         bool is_new;
966
967         if (q->properties.is_suspended)
968                 return 0;
969
970         pr_debug("Suspending PASID %u queue [%i]\n",
971                         pdd->process->pasid,
972                         q->properties.queue_id);
973
974         is_new = q->properties.exception_status & KFD_EC_MASK(EC_QUEUE_NEW);
975
976         if (is_new || q->properties.is_being_destroyed) {
977                 pr_debug("Suspend: skip %s queue id %i\n",
978                                 is_new ? "new" : "destroyed",
979                                 q->properties.queue_id);
980                 return -EBUSY;
981         }
982
983         q->properties.is_suspended = true;
984         if (q->properties.is_active) {
985                 if (dqm->dev->kfd->shared_resources.enable_mes) {
986                         int r = remove_queue_mes(dqm, q, &pdd->qpd);
987
988                         if (r)
989                                 return r;
990                 }
991
992                 decrement_queue_count(dqm, &pdd->qpd, q);
993                 q->properties.is_active = false;
994         }
995
996         return 0;
997 }
998
999 /* resume_single_queue does not lock the dqm like the functions
1000  * restore_process_queues_cpsch or restore_process_queues_nocpsch. You should
1001  * lock the dqm before calling, and unlock after calling.
1002  *
1003  * The reason we don't lock the dqm is because this function may be
1004  * called on multiple queues in a loop, so rather than locking/unlocking
1005  * multiple times, we will just keep the dqm locked for all of the calls.
1006  */
1007 static int resume_single_queue(struct device_queue_manager *dqm,
1008                                       struct qcm_process_device *qpd,
1009                                       struct queue *q)
1010 {
1011         struct kfd_process_device *pdd;
1012
1013         if (!q->properties.is_suspended)
1014                 return 0;
1015
1016         pdd = qpd_to_pdd(qpd);
1017
1018         pr_debug("Restoring from suspend PASID %u queue [%i]\n",
1019                             pdd->process->pasid,
1020                             q->properties.queue_id);
1021
1022         q->properties.is_suspended = false;
1023
1024         if (QUEUE_IS_ACTIVE(q->properties)) {
1025                 if (dqm->dev->kfd->shared_resources.enable_mes) {
1026                         int r = add_queue_mes(dqm, q, &pdd->qpd);
1027
1028                         if (r)
1029                                 return r;
1030                 }
1031
1032                 q->properties.is_active = true;
1033                 increment_queue_count(dqm, qpd, q);
1034         }
1035
1036         return 0;
1037 }
1038
1039 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
1040                                         struct qcm_process_device *qpd)
1041 {
1042         struct queue *q;
1043         struct mqd_manager *mqd_mgr;
1044         struct kfd_process_device *pdd;
1045         int retval, ret = 0;
1046
1047         dqm_lock(dqm);
1048         if (qpd->evicted++ > 0) /* already evicted, do nothing */
1049                 goto out;
1050
1051         pdd = qpd_to_pdd(qpd);
1052         pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
1053                             pdd->process->pasid);
1054
1055         pdd->last_evict_timestamp = get_jiffies_64();
1056         /* Mark all queues as evicted. Deactivate all active queues on
1057          * the qpd.
1058          */
1059         list_for_each_entry(q, &qpd->queues_list, list) {
1060                 q->properties.is_evicted = true;
1061                 if (!q->properties.is_active)
1062                         continue;
1063
1064                 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1065                                 q->properties.type)];
1066                 q->properties.is_active = false;
1067                 decrement_queue_count(dqm, qpd, q);
1068
1069                 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
1070                         continue;
1071
1072                 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
1073                                 (dqm->dev->kfd->cwsr_enabled ?
1074                                  KFD_PREEMPT_TYPE_WAVEFRONT_SAVE :
1075                                  KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
1076                                 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
1077                 if (retval && !ret)
1078                         /* Return the first error, but keep going to
1079                          * maintain a consistent eviction state
1080                          */
1081                         ret = retval;
1082         }
1083
1084 out:
1085         dqm_unlock(dqm);
1086         return ret;
1087 }
1088
1089 static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
1090                                       struct qcm_process_device *qpd)
1091 {
1092         struct queue *q;
1093         struct kfd_process_device *pdd;
1094         int retval = 0;
1095
1096         dqm_lock(dqm);
1097         if (qpd->evicted++ > 0) /* already evicted, do nothing */
1098                 goto out;
1099
1100         pdd = qpd_to_pdd(qpd);
1101
1102         /* The debugger creates processes that temporarily have not acquired
1103          * all VMs for all devices and has no VMs itself.
1104          * Skip queue eviction on process eviction.
1105          */
1106         if (!pdd->drm_priv)
1107                 goto out;
1108
1109         pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
1110                             pdd->process->pasid);
1111
1112         /* Mark all queues as evicted. Deactivate all active queues on
1113          * the qpd.
1114          */
1115         list_for_each_entry(q, &qpd->queues_list, list) {
1116                 q->properties.is_evicted = true;
1117                 if (!q->properties.is_active)
1118                         continue;
1119
1120                 q->properties.is_active = false;
1121                 decrement_queue_count(dqm, qpd, q);
1122
1123                 if (dqm->dev->kfd->shared_resources.enable_mes) {
1124                         retval = remove_queue_mes(dqm, q, qpd);
1125                         if (retval) {
1126                                 pr_err("Failed to evict queue %d\n",
1127                                         q->properties.queue_id);
1128                                 goto out;
1129                         }
1130                 }
1131         }
1132         pdd->last_evict_timestamp = get_jiffies_64();
1133         if (!dqm->dev->kfd->shared_resources.enable_mes)
1134                 retval = execute_queues_cpsch(dqm,
1135                                               qpd->is_debug ?
1136                                               KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
1137                                               KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
1138                                               USE_DEFAULT_GRACE_PERIOD);
1139
1140 out:
1141         dqm_unlock(dqm);
1142         return retval;
1143 }
1144
1145 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
1146                                           struct qcm_process_device *qpd)
1147 {
1148         struct mm_struct *mm = NULL;
1149         struct queue *q;
1150         struct mqd_manager *mqd_mgr;
1151         struct kfd_process_device *pdd;
1152         uint64_t pd_base;
1153         uint64_t eviction_duration;
1154         int retval, ret = 0;
1155
1156         pdd = qpd_to_pdd(qpd);
1157         /* Retrieve PD base */
1158         pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
1159
1160         dqm_lock(dqm);
1161         if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
1162                 goto out;
1163         if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
1164                 qpd->evicted--;
1165                 goto out;
1166         }
1167
1168         pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
1169                             pdd->process->pasid);
1170
1171         /* Update PD Base in QPD */
1172         qpd->page_table_base = pd_base;
1173         pr_debug("Updated PD address to 0x%llx\n", pd_base);
1174
1175         if (!list_empty(&qpd->queues_list)) {
1176                 dqm->dev->kfd2kgd->set_vm_context_page_table_base(
1177                                 dqm->dev->adev,
1178                                 qpd->vmid,
1179                                 qpd->page_table_base);
1180                 kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
1181         }
1182
1183         /* Take a safe reference to the mm_struct, which may otherwise
1184          * disappear even while the kfd_process is still referenced.
1185          */
1186         mm = get_task_mm(pdd->process->lead_thread);
1187         if (!mm) {
1188                 ret = -EFAULT;
1189                 goto out;
1190         }
1191
1192         /* Remove the eviction flags. Activate queues that are not
1193          * inactive for other reasons.
1194          */
1195         list_for_each_entry(q, &qpd->queues_list, list) {
1196                 q->properties.is_evicted = false;
1197                 if (!QUEUE_IS_ACTIVE(q->properties))
1198                         continue;
1199
1200                 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1201                                 q->properties.type)];
1202                 q->properties.is_active = true;
1203                 increment_queue_count(dqm, qpd, q);
1204
1205                 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
1206                         continue;
1207
1208                 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
1209                                        q->queue, &q->properties, mm);
1210                 if (retval && !ret)
1211                         /* Return the first error, but keep going to
1212                          * maintain a consistent eviction state
1213                          */
1214                         ret = retval;
1215         }
1216         qpd->evicted = 0;
1217         eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
1218         atomic64_add(eviction_duration, &pdd->evict_duration_counter);
1219 out:
1220         if (mm)
1221                 mmput(mm);
1222         dqm_unlock(dqm);
1223         return ret;
1224 }
1225
1226 static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
1227                                         struct qcm_process_device *qpd)
1228 {
1229         struct queue *q;
1230         struct kfd_process_device *pdd;
1231         uint64_t eviction_duration;
1232         int retval = 0;
1233
1234         pdd = qpd_to_pdd(qpd);
1235
1236         dqm_lock(dqm);
1237         if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
1238                 goto out;
1239         if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
1240                 qpd->evicted--;
1241                 goto out;
1242         }
1243
1244         /* The debugger creates processes that temporarily have not acquired
1245          * all VMs for all devices and has no VMs itself.
1246          * Skip queue restore on process restore.
1247          */
1248         if (!pdd->drm_priv)
1249                 goto vm_not_acquired;
1250
1251         pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
1252                             pdd->process->pasid);
1253
1254         /* Update PD Base in QPD */
1255         qpd->page_table_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
1256         pr_debug("Updated PD address to 0x%llx\n", qpd->page_table_base);
1257
1258         /* activate all active queues on the qpd */
1259         list_for_each_entry(q, &qpd->queues_list, list) {
1260                 q->properties.is_evicted = false;
1261                 if (!QUEUE_IS_ACTIVE(q->properties))
1262                         continue;
1263
1264                 q->properties.is_active = true;
1265                 increment_queue_count(dqm, &pdd->qpd, q);
1266
1267                 if (dqm->dev->kfd->shared_resources.enable_mes) {
1268                         retval = add_queue_mes(dqm, q, qpd);
1269                         if (retval) {
1270                                 pr_err("Failed to restore queue %d\n",
1271                                         q->properties.queue_id);
1272                                 goto out;
1273                         }
1274                 }
1275         }
1276         if (!dqm->dev->kfd->shared_resources.enable_mes)
1277                 retval = execute_queues_cpsch(dqm,
1278                                               KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
1279         eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
1280         atomic64_add(eviction_duration, &pdd->evict_duration_counter);
1281 vm_not_acquired:
1282         qpd->evicted = 0;
1283 out:
1284         dqm_unlock(dqm);
1285         return retval;
1286 }
1287
1288 static int register_process(struct device_queue_manager *dqm,
1289                                         struct qcm_process_device *qpd)
1290 {
1291         struct device_process_node *n;
1292         struct kfd_process_device *pdd;
1293         uint64_t pd_base;
1294         int retval;
1295
1296         n = kzalloc(sizeof(*n), GFP_KERNEL);
1297         if (!n)
1298                 return -ENOMEM;
1299
1300         n->qpd = qpd;
1301
1302         pdd = qpd_to_pdd(qpd);
1303         /* Retrieve PD base */
1304         pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
1305
1306         dqm_lock(dqm);
1307         list_add(&n->list, &dqm->queues);
1308
1309         /* Update PD Base in QPD */
1310         qpd->page_table_base = pd_base;
1311         pr_debug("Updated PD address to 0x%llx\n", pd_base);
1312
1313         retval = dqm->asic_ops.update_qpd(dqm, qpd);
1314
1315         dqm->processes_count++;
1316
1317         dqm_unlock(dqm);
1318
1319         /* Outside the DQM lock because under the DQM lock we can't do
1320          * reclaim or take other locks that others hold while reclaiming.
1321          */
1322         kfd_inc_compute_active(dqm->dev);
1323
1324         return retval;
1325 }
1326
1327 static int unregister_process(struct device_queue_manager *dqm,
1328                                         struct qcm_process_device *qpd)
1329 {
1330         int retval;
1331         struct device_process_node *cur, *next;
1332
1333         pr_debug("qpd->queues_list is %s\n",
1334                         list_empty(&qpd->queues_list) ? "empty" : "not empty");
1335
1336         retval = 0;
1337         dqm_lock(dqm);
1338
1339         list_for_each_entry_safe(cur, next, &dqm->queues, list) {
1340                 if (qpd == cur->qpd) {
1341                         list_del(&cur->list);
1342                         kfree(cur);
1343                         dqm->processes_count--;
1344                         goto out;
1345                 }
1346         }
1347         /* qpd not found in dqm list */
1348         retval = 1;
1349 out:
1350         dqm_unlock(dqm);
1351
1352         /* Outside the DQM lock because under the DQM lock we can't do
1353          * reclaim or take other locks that others hold while reclaiming.
1354          */
1355         if (!retval)
1356                 kfd_dec_compute_active(dqm->dev);
1357
1358         return retval;
1359 }
1360
1361 static int
1362 set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,
1363                         unsigned int vmid)
1364 {
1365         uint32_t xcc_mask = dqm->dev->xcc_mask;
1366         int xcc_id, ret;
1367
1368         for_each_inst(xcc_id, xcc_mask) {
1369                 ret = dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
1370                         dqm->dev->adev, pasid, vmid, xcc_id);
1371                 if (ret)
1372                         break;
1373         }
1374
1375         return ret;
1376 }
1377
1378 static void init_interrupts(struct device_queue_manager *dqm)
1379 {
1380         uint32_t xcc_mask = dqm->dev->xcc_mask;
1381         unsigned int i, xcc_id;
1382
1383         for_each_inst(xcc_id, xcc_mask) {
1384                 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) {
1385                         if (is_pipe_enabled(dqm, 0, i)) {
1386                                 dqm->dev->kfd2kgd->init_interrupts(
1387                                         dqm->dev->adev, i, xcc_id);
1388                         }
1389                 }
1390         }
1391 }
1392
1393 static int initialize_nocpsch(struct device_queue_manager *dqm)
1394 {
1395         int pipe, queue;
1396
1397         pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
1398
1399         dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
1400                                         sizeof(unsigned int), GFP_KERNEL);
1401         if (!dqm->allocated_queues)
1402                 return -ENOMEM;
1403
1404         mutex_init(&dqm->lock_hidden);
1405         INIT_LIST_HEAD(&dqm->queues);
1406         dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
1407         dqm->active_cp_queue_count = 0;
1408         dqm->gws_queue_count = 0;
1409
1410         for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
1411                 int pipe_offset = pipe * get_queues_per_pipe(dqm);
1412
1413                 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
1414                         if (test_bit(pipe_offset + queue,
1415                                      dqm->dev->kfd->shared_resources.cp_queue_bitmap))
1416                                 dqm->allocated_queues[pipe] |= 1 << queue;
1417         }
1418
1419         memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
1420
1421         init_sdma_bitmaps(dqm);
1422
1423         return 0;
1424 }
1425
1426 static void uninitialize(struct device_queue_manager *dqm)
1427 {
1428         int i;
1429
1430         WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0);
1431
1432         kfree(dqm->allocated_queues);
1433         for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
1434                 kfree(dqm->mqd_mgrs[i]);
1435         mutex_destroy(&dqm->lock_hidden);
1436 }
1437
1438 static int start_nocpsch(struct device_queue_manager *dqm)
1439 {
1440         int r = 0;
1441
1442         pr_info("SW scheduler is used");
1443         init_interrupts(dqm);
1444
1445         if (dqm->dev->adev->asic_type == CHIP_HAWAII)
1446                 r = pm_init(&dqm->packet_mgr, dqm);
1447         if (!r)
1448                 dqm->sched_running = true;
1449
1450         return r;
1451 }
1452
1453 static int stop_nocpsch(struct device_queue_manager *dqm)
1454 {
1455         dqm_lock(dqm);
1456         if (!dqm->sched_running) {
1457                 dqm_unlock(dqm);
1458                 return 0;
1459         }
1460
1461         if (dqm->dev->adev->asic_type == CHIP_HAWAII)
1462                 pm_uninit(&dqm->packet_mgr, false);
1463         dqm->sched_running = false;
1464         dqm_unlock(dqm);
1465
1466         return 0;
1467 }
1468
1469 static void pre_reset(struct device_queue_manager *dqm)
1470 {
1471         dqm_lock(dqm);
1472         dqm->is_resetting = true;
1473         dqm_unlock(dqm);
1474 }
1475
1476 static int allocate_sdma_queue(struct device_queue_manager *dqm,
1477                                 struct queue *q, const uint32_t *restore_sdma_id)
1478 {
1479         int bit;
1480
1481         if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1482                 if (bitmap_empty(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES)) {
1483                         pr_err("No more SDMA queue to allocate\n");
1484                         return -ENOMEM;
1485                 }
1486
1487                 if (restore_sdma_id) {
1488                         /* Re-use existing sdma_id */
1489                         if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) {
1490                                 pr_err("SDMA queue already in use\n");
1491                                 return -EBUSY;
1492                         }
1493                         clear_bit(*restore_sdma_id, dqm->sdma_bitmap);
1494                         q->sdma_id = *restore_sdma_id;
1495                 } else {
1496                         /* Find first available sdma_id */
1497                         bit = find_first_bit(dqm->sdma_bitmap,
1498                                              get_num_sdma_queues(dqm));
1499                         clear_bit(bit, dqm->sdma_bitmap);
1500                         q->sdma_id = bit;
1501                 }
1502
1503                 q->properties.sdma_engine_id =
1504                         q->sdma_id % kfd_get_num_sdma_engines(dqm->dev);
1505                 q->properties.sdma_queue_id = q->sdma_id /
1506                                 kfd_get_num_sdma_engines(dqm->dev);
1507         } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1508                 if (bitmap_empty(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES)) {
1509                         pr_err("No more XGMI SDMA queue to allocate\n");
1510                         return -ENOMEM;
1511                 }
1512                 if (restore_sdma_id) {
1513                         /* Re-use existing sdma_id */
1514                         if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) {
1515                                 pr_err("SDMA queue already in use\n");
1516                                 return -EBUSY;
1517                         }
1518                         clear_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap);
1519                         q->sdma_id = *restore_sdma_id;
1520                 } else {
1521                         bit = find_first_bit(dqm->xgmi_sdma_bitmap,
1522                                              get_num_xgmi_sdma_queues(dqm));
1523                         clear_bit(bit, dqm->xgmi_sdma_bitmap);
1524                         q->sdma_id = bit;
1525                 }
1526                 /* sdma_engine_id is sdma id including
1527                  * both PCIe-optimized SDMAs and XGMI-
1528                  * optimized SDMAs. The calculation below
1529                  * assumes the first N engines are always
1530                  * PCIe-optimized ones
1531                  */
1532                 q->properties.sdma_engine_id =
1533                         kfd_get_num_sdma_engines(dqm->dev) +
1534                         q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev);
1535                 q->properties.sdma_queue_id = q->sdma_id /
1536                         kfd_get_num_xgmi_sdma_engines(dqm->dev);
1537         }
1538
1539         pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
1540         pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
1541
1542         return 0;
1543 }
1544
1545 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
1546                                 struct queue *q)
1547 {
1548         if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1549                 if (q->sdma_id >= get_num_sdma_queues(dqm))
1550                         return;
1551                 set_bit(q->sdma_id, dqm->sdma_bitmap);
1552         } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1553                 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
1554                         return;
1555                 set_bit(q->sdma_id, dqm->xgmi_sdma_bitmap);
1556         }
1557 }
1558
1559 /*
1560  * Device Queue Manager implementation for cp scheduler
1561  */
1562
1563 static int set_sched_resources(struct device_queue_manager *dqm)
1564 {
1565         int i, mec;
1566         struct scheduling_resources res;
1567
1568         res.vmid_mask = dqm->dev->compute_vmid_bitmap;
1569
1570         res.queue_mask = 0;
1571         for (i = 0; i < KGD_MAX_QUEUES; ++i) {
1572                 mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe)
1573                         / dqm->dev->kfd->shared_resources.num_pipe_per_mec;
1574
1575                 if (!test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap))
1576                         continue;
1577
1578                 /* only acquire queues from the first MEC */
1579                 if (mec > 0)
1580                         continue;
1581
1582                 /* This situation may be hit in the future if a new HW
1583                  * generation exposes more than 64 queues. If so, the
1584                  * definition of res.queue_mask needs updating
1585                  */
1586                 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
1587                         pr_err("Invalid queue enabled by amdgpu: %d\n", i);
1588                         break;
1589                 }
1590
1591                 res.queue_mask |= 1ull
1592                         << amdgpu_queue_mask_bit_to_set_resource_bit(
1593                                 dqm->dev->adev, i);
1594         }
1595         res.gws_mask = ~0ull;
1596         res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0;
1597
1598         pr_debug("Scheduling resources:\n"
1599                         "vmid mask: 0x%8X\n"
1600                         "queue mask: 0x%8llX\n",
1601                         res.vmid_mask, res.queue_mask);
1602
1603         return pm_send_set_resources(&dqm->packet_mgr, &res);
1604 }
1605
1606 static int initialize_cpsch(struct device_queue_manager *dqm)
1607 {
1608         pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
1609
1610         mutex_init(&dqm->lock_hidden);
1611         INIT_LIST_HEAD(&dqm->queues);
1612         dqm->active_queue_count = dqm->processes_count = 0;
1613         dqm->active_cp_queue_count = 0;
1614         dqm->gws_queue_count = 0;
1615         dqm->active_runlist = false;
1616         INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
1617         dqm->trap_debug_vmid = 0;
1618
1619         init_sdma_bitmaps(dqm);
1620
1621         if (dqm->dev->kfd2kgd->get_iq_wait_times)
1622                 dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
1623                                         &dqm->wait_times);
1624         return 0;
1625 }
1626
1627 static int start_cpsch(struct device_queue_manager *dqm)
1628 {
1629         int retval;
1630
1631         retval = 0;
1632
1633         dqm_lock(dqm);
1634
1635         if (!dqm->dev->kfd->shared_resources.enable_mes) {
1636                 retval = pm_init(&dqm->packet_mgr, dqm);
1637                 if (retval)
1638                         goto fail_packet_manager_init;
1639
1640                 retval = set_sched_resources(dqm);
1641                 if (retval)
1642                         goto fail_set_sched_resources;
1643         }
1644         pr_debug("Allocating fence memory\n");
1645
1646         /* allocate fence memory on the gart */
1647         retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
1648                                         &dqm->fence_mem);
1649
1650         if (retval)
1651                 goto fail_allocate_vidmem;
1652
1653         dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr;
1654         dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
1655
1656         init_interrupts(dqm);
1657
1658         /* clear hang status when driver try to start the hw scheduler */
1659         dqm->is_hws_hang = false;
1660         dqm->is_resetting = false;
1661         dqm->sched_running = true;
1662
1663         if (!dqm->dev->kfd->shared_resources.enable_mes)
1664                 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
1665         dqm_unlock(dqm);
1666
1667         return 0;
1668 fail_allocate_vidmem:
1669 fail_set_sched_resources:
1670         if (!dqm->dev->kfd->shared_resources.enable_mes)
1671                 pm_uninit(&dqm->packet_mgr, false);
1672 fail_packet_manager_init:
1673         dqm_unlock(dqm);
1674         return retval;
1675 }
1676
1677 static int stop_cpsch(struct device_queue_manager *dqm)
1678 {
1679         bool hanging;
1680
1681         dqm_lock(dqm);
1682         if (!dqm->sched_running) {
1683                 dqm_unlock(dqm);
1684                 return 0;
1685         }
1686
1687         if (!dqm->is_hws_hang) {
1688                 if (!dqm->dev->kfd->shared_resources.enable_mes)
1689                         unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false);
1690                 else
1691                         remove_all_queues_mes(dqm);
1692         }
1693
1694         hanging = dqm->is_hws_hang || dqm->is_resetting;
1695         dqm->sched_running = false;
1696
1697         if (!dqm->dev->kfd->shared_resources.enable_mes)
1698                 pm_release_ib(&dqm->packet_mgr);
1699
1700         kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
1701         if (!dqm->dev->kfd->shared_resources.enable_mes)
1702                 pm_uninit(&dqm->packet_mgr, hanging);
1703         dqm_unlock(dqm);
1704
1705         return 0;
1706 }
1707
1708 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
1709                                         struct kernel_queue *kq,
1710                                         struct qcm_process_device *qpd)
1711 {
1712         dqm_lock(dqm);
1713         if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1714                 pr_warn("Can't create new kernel queue because %d queues were already created\n",
1715                                 dqm->total_queue_count);
1716                 dqm_unlock(dqm);
1717                 return -EPERM;
1718         }
1719
1720         /*
1721          * Unconditionally increment this counter, regardless of the queue's
1722          * type or whether the queue is active.
1723          */
1724         dqm->total_queue_count++;
1725         pr_debug("Total of %d queues are accountable so far\n",
1726                         dqm->total_queue_count);
1727
1728         list_add(&kq->list, &qpd->priv_queue_list);
1729         increment_queue_count(dqm, qpd, kq->queue);
1730         qpd->is_debug = true;
1731         execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
1732                         USE_DEFAULT_GRACE_PERIOD);
1733         dqm_unlock(dqm);
1734
1735         return 0;
1736 }
1737
1738 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
1739                                         struct kernel_queue *kq,
1740                                         struct qcm_process_device *qpd)
1741 {
1742         dqm_lock(dqm);
1743         list_del(&kq->list);
1744         decrement_queue_count(dqm, qpd, kq->queue);
1745         qpd->is_debug = false;
1746         execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
1747                         USE_DEFAULT_GRACE_PERIOD);
1748         /*
1749          * Unconditionally decrement this counter, regardless of the queue's
1750          * type.
1751          */
1752         dqm->total_queue_count--;
1753         pr_debug("Total of %d queues are accountable so far\n",
1754                         dqm->total_queue_count);
1755         dqm_unlock(dqm);
1756 }
1757
1758 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
1759                         struct qcm_process_device *qpd,
1760                         const struct kfd_criu_queue_priv_data *qd,
1761                         const void *restore_mqd, const void *restore_ctl_stack)
1762 {
1763         int retval;
1764         struct mqd_manager *mqd_mgr;
1765
1766         if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1767                 pr_warn("Can't create new usermode queue because %d queues were already created\n",
1768                                 dqm->total_queue_count);
1769                 retval = -EPERM;
1770                 goto out;
1771         }
1772
1773         if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1774                 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1775                 dqm_lock(dqm);
1776                 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
1777                 dqm_unlock(dqm);
1778                 if (retval)
1779                         goto out;
1780         }
1781
1782         retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
1783         if (retval)
1784                 goto out_deallocate_sdma_queue;
1785
1786         mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1787                         q->properties.type)];
1788
1789         if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1790                 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1791                 dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
1792         q->properties.tba_addr = qpd->tba_addr;
1793         q->properties.tma_addr = qpd->tma_addr;
1794         q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
1795         if (!q->mqd_mem_obj) {
1796                 retval = -ENOMEM;
1797                 goto out_deallocate_doorbell;
1798         }
1799
1800         dqm_lock(dqm);
1801         /*
1802          * Eviction state logic: mark all queues as evicted, even ones
1803          * not currently active. Restoring inactive queues later only
1804          * updates the is_evicted flag but is a no-op otherwise.
1805          */
1806         q->properties.is_evicted = !!qpd->evicted;
1807         q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled &&
1808                                   kfd_dbg_has_cwsr_workaround(q->device);
1809
1810         if (qd)
1811                 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
1812                                      &q->properties, restore_mqd, restore_ctl_stack,
1813                                      qd->ctl_stack_size);
1814         else
1815                 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
1816                                         &q->gart_mqd_addr, &q->properties);
1817
1818         list_add(&q->list, &qpd->queues_list);
1819         qpd->queue_count++;
1820
1821         if (q->properties.is_active) {
1822                 increment_queue_count(dqm, qpd, q);
1823
1824                 if (!dqm->dev->kfd->shared_resources.enable_mes)
1825                         retval = execute_queues_cpsch(dqm,
1826                                         KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
1827                 else
1828                         retval = add_queue_mes(dqm, q, qpd);
1829                 if (retval)
1830                         goto cleanup_queue;
1831         }
1832
1833         /*
1834          * Unconditionally increment this counter, regardless of the queue's
1835          * type or whether the queue is active.
1836          */
1837         dqm->total_queue_count++;
1838
1839         pr_debug("Total of %d queues are accountable so far\n",
1840                         dqm->total_queue_count);
1841
1842         dqm_unlock(dqm);
1843         return retval;
1844
1845 cleanup_queue:
1846         qpd->queue_count--;
1847         list_del(&q->list);
1848         if (q->properties.is_active)
1849                 decrement_queue_count(dqm, qpd, q);
1850         mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1851         dqm_unlock(dqm);
1852 out_deallocate_doorbell:
1853         deallocate_doorbell(qpd, q);
1854 out_deallocate_sdma_queue:
1855         if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1856                 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1857                 dqm_lock(dqm);
1858                 deallocate_sdma_queue(dqm, q);
1859                 dqm_unlock(dqm);
1860         }
1861 out:
1862         return retval;
1863 }
1864
1865 int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
1866                                 uint64_t fence_value,
1867                                 unsigned int timeout_ms)
1868 {
1869         unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
1870
1871         while (*fence_addr != fence_value) {
1872                 if (time_after(jiffies, end_jiffies)) {
1873                         pr_err("qcm fence wait loop timeout expired\n");
1874                         /* In HWS case, this is used to halt the driver thread
1875                          * in order not to mess up CP states before doing
1876                          * scandumps for FW debugging.
1877                          */
1878                         while (halt_if_hws_hang)
1879                                 schedule();
1880
1881                         return -ETIME;
1882                 }
1883                 schedule();
1884         }
1885
1886         return 0;
1887 }
1888
1889 /* dqm->lock mutex has to be locked before calling this function */
1890 static int map_queues_cpsch(struct device_queue_manager *dqm)
1891 {
1892         int retval;
1893
1894         if (!dqm->sched_running)
1895                 return 0;
1896         if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0)
1897                 return 0;
1898         if (dqm->active_runlist)
1899                 return 0;
1900
1901         retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues);
1902         pr_debug("%s sent runlist\n", __func__);
1903         if (retval) {
1904                 pr_err("failed to execute runlist\n");
1905                 return retval;
1906         }
1907         dqm->active_runlist = true;
1908
1909         return retval;
1910 }
1911
1912 /* dqm->lock mutex has to be locked before calling this function */
1913 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
1914                                 enum kfd_unmap_queues_filter filter,
1915                                 uint32_t filter_param,
1916                                 uint32_t grace_period,
1917                                 bool reset)
1918 {
1919         int retval = 0;
1920         struct mqd_manager *mqd_mgr;
1921
1922         if (!dqm->sched_running)
1923                 return 0;
1924         if (dqm->is_hws_hang || dqm->is_resetting)
1925                 return -EIO;
1926         if (!dqm->active_runlist)
1927                 return retval;
1928
1929         if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
1930                 retval = pm_update_grace_period(&dqm->packet_mgr, grace_period);
1931                 if (retval)
1932                         return retval;
1933         }
1934
1935         retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset);
1936         if (retval)
1937                 return retval;
1938
1939         *dqm->fence_addr = KFD_FENCE_INIT;
1940         pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr,
1941                                 KFD_FENCE_COMPLETED);
1942         /* should be timed out */
1943         retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
1944                                 queue_preemption_timeout_ms);
1945         if (retval) {
1946                 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
1947                 kfd_hws_hang(dqm);
1948                 return retval;
1949         }
1950
1951         /* In the current MEC firmware implementation, if compute queue
1952          * doesn't response to the preemption request in time, HIQ will
1953          * abandon the unmap request without returning any timeout error
1954          * to driver. Instead, MEC firmware will log the doorbell of the
1955          * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields.
1956          * To make sure the queue unmap was successful, driver need to
1957          * check those fields
1958          */
1959         mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
1960         if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) {
1961                 pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
1962                 while (halt_if_hws_hang)
1963                         schedule();
1964                 return -ETIME;
1965         }
1966
1967         /* We need to reset the grace period value for this device */
1968         if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
1969                 if (pm_update_grace_period(&dqm->packet_mgr,
1970                                         USE_DEFAULT_GRACE_PERIOD))
1971                         pr_err("Failed to reset grace period\n");
1972         }
1973
1974         pm_release_ib(&dqm->packet_mgr);
1975         dqm->active_runlist = false;
1976
1977         return retval;
1978 }
1979
1980 /* only for compute queue */
1981 static int reset_queues_cpsch(struct device_queue_manager *dqm,
1982                         uint16_t pasid)
1983 {
1984         int retval;
1985
1986         dqm_lock(dqm);
1987
1988         retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID,
1989                         pasid, USE_DEFAULT_GRACE_PERIOD, true);
1990
1991         dqm_unlock(dqm);
1992         return retval;
1993 }
1994
1995 /* dqm->lock mutex has to be locked before calling this function */
1996 static int execute_queues_cpsch(struct device_queue_manager *dqm,
1997                                 enum kfd_unmap_queues_filter filter,
1998                                 uint32_t filter_param,
1999                                 uint32_t grace_period)
2000 {
2001         int retval;
2002
2003         if (dqm->is_hws_hang)
2004                 return -EIO;
2005         retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, false);
2006         if (retval)
2007                 return retval;
2008
2009         return map_queues_cpsch(dqm);
2010 }
2011
2012 static int wait_on_destroy_queue(struct device_queue_manager *dqm,
2013                                  struct queue *q)
2014 {
2015         struct kfd_process_device *pdd = kfd_get_process_device_data(q->device,
2016                                                                 q->process);
2017         int ret = 0;
2018
2019         if (pdd->qpd.is_debug)
2020                 return ret;
2021
2022         q->properties.is_being_destroyed = true;
2023
2024         if (pdd->process->debug_trap_enabled && q->properties.is_suspended) {
2025                 dqm_unlock(dqm);
2026                 mutex_unlock(&q->process->mutex);
2027                 ret = wait_event_interruptible(dqm->destroy_wait,
2028                                                 !q->properties.is_suspended);
2029
2030                 mutex_lock(&q->process->mutex);
2031                 dqm_lock(dqm);
2032         }
2033
2034         return ret;
2035 }
2036
2037 static int destroy_queue_cpsch(struct device_queue_manager *dqm,
2038                                 struct qcm_process_device *qpd,
2039                                 struct queue *q)
2040 {
2041         int retval;
2042         struct mqd_manager *mqd_mgr;
2043         uint64_t sdma_val = 0;
2044         struct kfd_process_device *pdd = qpd_to_pdd(qpd);
2045
2046         /* Get the SDMA queue stats */
2047         if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
2048             (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
2049                 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
2050                                                         &sdma_val);
2051                 if (retval)
2052                         pr_err("Failed to read SDMA queue counter for queue: %d\n",
2053                                 q->properties.queue_id);
2054         }
2055
2056         /* remove queue from list to prevent rescheduling after preemption */
2057         dqm_lock(dqm);
2058
2059         retval = wait_on_destroy_queue(dqm, q);
2060
2061         if (retval) {
2062                 dqm_unlock(dqm);
2063                 return retval;
2064         }
2065
2066         if (qpd->is_debug) {
2067                 /*
2068                  * error, currently we do not allow to destroy a queue
2069                  * of a currently debugged process
2070                  */
2071                 retval = -EBUSY;
2072                 goto failed_try_destroy_debugged_queue;
2073
2074         }
2075
2076         mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
2077                         q->properties.type)];
2078
2079         deallocate_doorbell(qpd, q);
2080
2081         if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
2082             (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
2083                 deallocate_sdma_queue(dqm, q);
2084                 pdd->sdma_past_activity_counter += sdma_val;
2085         }
2086
2087         list_del(&q->list);
2088         qpd->queue_count--;
2089         if (q->properties.is_active) {
2090                 decrement_queue_count(dqm, qpd, q);
2091                 if (!dqm->dev->kfd->shared_resources.enable_mes) {
2092                         retval = execute_queues_cpsch(dqm,
2093                                                       KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
2094                                                       USE_DEFAULT_GRACE_PERIOD);
2095                         if (retval == -ETIME)
2096                                 qpd->reset_wavefronts = true;
2097                 } else {
2098                         retval = remove_queue_mes(dqm, q, qpd);
2099                 }
2100         }
2101
2102         /*
2103          * Unconditionally decrement this counter, regardless of the queue's
2104          * type
2105          */
2106         dqm->total_queue_count--;
2107         pr_debug("Total of %d queues are accountable so far\n",
2108                         dqm->total_queue_count);
2109
2110         dqm_unlock(dqm);
2111
2112         /*
2113          * Do free_mqd and raise delete event after dqm_unlock(dqm) to avoid
2114          * circular locking
2115          */
2116         kfd_dbg_ev_raise(KFD_EC_MASK(EC_DEVICE_QUEUE_DELETE),
2117                                 qpd->pqm->process, q->device,
2118                                 -1, false, NULL, 0);
2119
2120         mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
2121
2122         return retval;
2123
2124 failed_try_destroy_debugged_queue:
2125
2126         dqm_unlock(dqm);
2127         return retval;
2128 }
2129
2130 /*
2131  * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
2132  * stay in user mode.
2133  */
2134 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
2135 /* APE1 limit is inclusive and 64K aligned. */
2136 #define APE1_LIMIT_ALIGNMENT 0xFFFF
2137
2138 static bool set_cache_memory_policy(struct device_queue_manager *dqm,
2139                                    struct qcm_process_device *qpd,
2140                                    enum cache_policy default_policy,
2141                                    enum cache_policy alternate_policy,
2142                                    void __user *alternate_aperture_base,
2143                                    uint64_t alternate_aperture_size)
2144 {
2145         bool retval = true;
2146
2147         if (!dqm->asic_ops.set_cache_memory_policy)
2148                 return retval;
2149
2150         dqm_lock(dqm);
2151
2152         if (alternate_aperture_size == 0) {
2153                 /* base > limit disables APE1 */
2154                 qpd->sh_mem_ape1_base = 1;
2155                 qpd->sh_mem_ape1_limit = 0;
2156         } else {
2157                 /*
2158                  * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
2159                  *                      SH_MEM_APE1_BASE[31:0], 0x0000 }
2160                  * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
2161                  *                      SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
2162                  * Verify that the base and size parameters can be
2163                  * represented in this format and convert them.
2164                  * Additionally restrict APE1 to user-mode addresses.
2165                  */
2166
2167                 uint64_t base = (uintptr_t)alternate_aperture_base;
2168                 uint64_t limit = base + alternate_aperture_size - 1;
2169
2170                 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
2171                    (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
2172                         retval = false;
2173                         goto out;
2174                 }
2175
2176                 qpd->sh_mem_ape1_base = base >> 16;
2177                 qpd->sh_mem_ape1_limit = limit >> 16;
2178         }
2179
2180         retval = dqm->asic_ops.set_cache_memory_policy(
2181                         dqm,
2182                         qpd,
2183                         default_policy,
2184                         alternate_policy,
2185                         alternate_aperture_base,
2186                         alternate_aperture_size);
2187
2188         if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
2189                 program_sh_mem_settings(dqm, qpd);
2190
2191         pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
2192                 qpd->sh_mem_config, qpd->sh_mem_ape1_base,
2193                 qpd->sh_mem_ape1_limit);
2194
2195 out:
2196         dqm_unlock(dqm);
2197         return retval;
2198 }
2199
2200 static int process_termination_nocpsch(struct device_queue_manager *dqm,
2201                 struct qcm_process_device *qpd)
2202 {
2203         struct queue *q;
2204         struct device_process_node *cur, *next_dpn;
2205         int retval = 0;
2206         bool found = false;
2207
2208         dqm_lock(dqm);
2209
2210         /* Clear all user mode queues */
2211         while (!list_empty(&qpd->queues_list)) {
2212                 struct mqd_manager *mqd_mgr;
2213                 int ret;
2214
2215                 q = list_first_entry(&qpd->queues_list, struct queue, list);
2216                 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
2217                                 q->properties.type)];
2218                 ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
2219                 if (ret)
2220                         retval = ret;
2221                 dqm_unlock(dqm);
2222                 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
2223                 dqm_lock(dqm);
2224         }
2225
2226         /* Unregister process */
2227         list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
2228                 if (qpd == cur->qpd) {
2229                         list_del(&cur->list);
2230                         kfree(cur);
2231                         dqm->processes_count--;
2232                         found = true;
2233                         break;
2234                 }
2235         }
2236
2237         dqm_unlock(dqm);
2238
2239         /* Outside the DQM lock because under the DQM lock we can't do
2240          * reclaim or take other locks that others hold while reclaiming.
2241          */
2242         if (found)
2243                 kfd_dec_compute_active(dqm->dev);
2244
2245         return retval;
2246 }
2247
2248 static int get_wave_state(struct device_queue_manager *dqm,
2249                           struct queue *q,
2250                           void __user *ctl_stack,
2251                           u32 *ctl_stack_used_size,
2252                           u32 *save_area_used_size)
2253 {
2254         struct mqd_manager *mqd_mgr;
2255
2256         dqm_lock(dqm);
2257
2258         mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
2259
2260         if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
2261             q->properties.is_active || !q->device->kfd->cwsr_enabled ||
2262             !mqd_mgr->get_wave_state) {
2263                 dqm_unlock(dqm);
2264                 return -EINVAL;
2265         }
2266
2267         dqm_unlock(dqm);
2268
2269         /*
2270          * get_wave_state is outside the dqm lock to prevent circular locking
2271          * and the queue should be protected against destruction by the process
2272          * lock.
2273          */
2274         return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, &q->properties,
2275                         ctl_stack, ctl_stack_used_size, save_area_used_size);
2276 }
2277
2278 static void get_queue_checkpoint_info(struct device_queue_manager *dqm,
2279                         const struct queue *q,
2280                         u32 *mqd_size,
2281                         u32 *ctl_stack_size)
2282 {
2283         struct mqd_manager *mqd_mgr;
2284         enum KFD_MQD_TYPE mqd_type =
2285                         get_mqd_type_from_queue_type(q->properties.type);
2286
2287         dqm_lock(dqm);
2288         mqd_mgr = dqm->mqd_mgrs[mqd_type];
2289         *mqd_size = mqd_mgr->mqd_size;
2290         *ctl_stack_size = 0;
2291
2292         if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info)
2293                 mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size);
2294
2295         dqm_unlock(dqm);
2296 }
2297
2298 static int checkpoint_mqd(struct device_queue_manager *dqm,
2299                           const struct queue *q,
2300                           void *mqd,
2301                           void *ctl_stack)
2302 {
2303         struct mqd_manager *mqd_mgr;
2304         int r = 0;
2305         enum KFD_MQD_TYPE mqd_type =
2306                         get_mqd_type_from_queue_type(q->properties.type);
2307
2308         dqm_lock(dqm);
2309
2310         if (q->properties.is_active || !q->device->kfd->cwsr_enabled) {
2311                 r = -EINVAL;
2312                 goto dqm_unlock;
2313         }
2314
2315         mqd_mgr = dqm->mqd_mgrs[mqd_type];
2316         if (!mqd_mgr->checkpoint_mqd) {
2317                 r = -EOPNOTSUPP;
2318                 goto dqm_unlock;
2319         }
2320
2321         mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack);
2322
2323 dqm_unlock:
2324         dqm_unlock(dqm);
2325         return r;
2326 }
2327
2328 static int process_termination_cpsch(struct device_queue_manager *dqm,
2329                 struct qcm_process_device *qpd)
2330 {
2331         int retval;
2332         struct queue *q;
2333         struct kernel_queue *kq, *kq_next;
2334         struct mqd_manager *mqd_mgr;
2335         struct device_process_node *cur, *next_dpn;
2336         enum kfd_unmap_queues_filter filter =
2337                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
2338         bool found = false;
2339
2340         retval = 0;
2341
2342         dqm_lock(dqm);
2343
2344         /* Clean all kernel queues */
2345         list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
2346                 list_del(&kq->list);
2347                 decrement_queue_count(dqm, qpd, kq->queue);
2348                 qpd->is_debug = false;
2349                 dqm->total_queue_count--;
2350                 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
2351         }
2352
2353         /* Clear all user mode queues */
2354         list_for_each_entry(q, &qpd->queues_list, list) {
2355                 if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
2356                         deallocate_sdma_queue(dqm, q);
2357                 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
2358                         deallocate_sdma_queue(dqm, q);
2359
2360                 if (q->properties.is_active) {
2361                         decrement_queue_count(dqm, qpd, q);
2362
2363                         if (dqm->dev->kfd->shared_resources.enable_mes) {
2364                                 retval = remove_queue_mes(dqm, q, qpd);
2365                                 if (retval)
2366                                         pr_err("Failed to remove queue %d\n",
2367                                                 q->properties.queue_id);
2368                         }
2369                 }
2370
2371                 dqm->total_queue_count--;
2372         }
2373
2374         /* Unregister process */
2375         list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
2376                 if (qpd == cur->qpd) {
2377                         list_del(&cur->list);
2378                         kfree(cur);
2379                         dqm->processes_count--;
2380                         found = true;
2381                         break;
2382                 }
2383         }
2384
2385         if (!dqm->dev->kfd->shared_resources.enable_mes)
2386                 retval = execute_queues_cpsch(dqm, filter, 0, USE_DEFAULT_GRACE_PERIOD);
2387
2388         if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
2389                 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
2390                 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
2391                 qpd->reset_wavefronts = false;
2392         }
2393
2394         /* Lastly, free mqd resources.
2395          * Do free_mqd() after dqm_unlock to avoid circular locking.
2396          */
2397         while (!list_empty(&qpd->queues_list)) {
2398                 q = list_first_entry(&qpd->queues_list, struct queue, list);
2399                 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
2400                                 q->properties.type)];
2401                 list_del(&q->list);
2402                 qpd->queue_count--;
2403                 dqm_unlock(dqm);
2404                 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
2405                 dqm_lock(dqm);
2406         }
2407         dqm_unlock(dqm);
2408
2409         /* Outside the DQM lock because under the DQM lock we can't do
2410          * reclaim or take other locks that others hold while reclaiming.
2411          */
2412         if (found)
2413                 kfd_dec_compute_active(dqm->dev);
2414
2415         return retval;
2416 }
2417
2418 static int init_mqd_managers(struct device_queue_manager *dqm)
2419 {
2420         int i, j;
2421         struct mqd_manager *mqd_mgr;
2422
2423         for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
2424                 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
2425                 if (!mqd_mgr) {
2426                         pr_err("mqd manager [%d] initialization failed\n", i);
2427                         goto out_free;
2428                 }
2429                 dqm->mqd_mgrs[i] = mqd_mgr;
2430         }
2431
2432         return 0;
2433
2434 out_free:
2435         for (j = 0; j < i; j++) {
2436                 kfree(dqm->mqd_mgrs[j]);
2437                 dqm->mqd_mgrs[j] = NULL;
2438         }
2439
2440         return -ENOMEM;
2441 }
2442
2443 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
2444 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
2445 {
2446         int retval;
2447         struct kfd_node *dev = dqm->dev;
2448         struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
2449         uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
2450                 get_num_all_sdma_engines(dqm) *
2451                 dev->kfd->device_info.num_sdma_queues_per_engine +
2452                 (dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size *
2453                 NUM_XCC(dqm->dev->xcc_mask));
2454
2455         retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
2456                 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
2457                 (void *)&(mem_obj->cpu_ptr), false);
2458
2459         return retval;
2460 }
2461
2462 struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
2463 {
2464         struct device_queue_manager *dqm;
2465
2466         pr_debug("Loading device queue manager\n");
2467
2468         dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
2469         if (!dqm)
2470                 return NULL;
2471
2472         switch (dev->adev->asic_type) {
2473         /* HWS is not available on Hawaii. */
2474         case CHIP_HAWAII:
2475         /* HWS depends on CWSR for timely dequeue. CWSR is not
2476          * available on Tonga.
2477          *
2478          * FIXME: This argument also applies to Kaveri.
2479          */
2480         case CHIP_TONGA:
2481                 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
2482                 break;
2483         default:
2484                 dqm->sched_policy = sched_policy;
2485                 break;
2486         }
2487
2488         dqm->dev = dev;
2489         switch (dqm->sched_policy) {
2490         case KFD_SCHED_POLICY_HWS:
2491         case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
2492                 /* initialize dqm for cp scheduling */
2493                 dqm->ops.create_queue = create_queue_cpsch;
2494                 dqm->ops.initialize = initialize_cpsch;
2495                 dqm->ops.start = start_cpsch;
2496                 dqm->ops.stop = stop_cpsch;
2497                 dqm->ops.pre_reset = pre_reset;
2498                 dqm->ops.destroy_queue = destroy_queue_cpsch;
2499                 dqm->ops.update_queue = update_queue;
2500                 dqm->ops.register_process = register_process;
2501                 dqm->ops.unregister_process = unregister_process;
2502                 dqm->ops.uninitialize = uninitialize;
2503                 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
2504                 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
2505                 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
2506                 dqm->ops.process_termination = process_termination_cpsch;
2507                 dqm->ops.evict_process_queues = evict_process_queues_cpsch;
2508                 dqm->ops.restore_process_queues = restore_process_queues_cpsch;
2509                 dqm->ops.get_wave_state = get_wave_state;
2510                 dqm->ops.reset_queues = reset_queues_cpsch;
2511                 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
2512                 dqm->ops.checkpoint_mqd = checkpoint_mqd;
2513                 break;
2514         case KFD_SCHED_POLICY_NO_HWS:
2515                 /* initialize dqm for no cp scheduling */
2516                 dqm->ops.start = start_nocpsch;
2517                 dqm->ops.stop = stop_nocpsch;
2518                 dqm->ops.pre_reset = pre_reset;
2519                 dqm->ops.create_queue = create_queue_nocpsch;
2520                 dqm->ops.destroy_queue = destroy_queue_nocpsch;
2521                 dqm->ops.update_queue = update_queue;
2522                 dqm->ops.register_process = register_process;
2523                 dqm->ops.unregister_process = unregister_process;
2524                 dqm->ops.initialize = initialize_nocpsch;
2525                 dqm->ops.uninitialize = uninitialize;
2526                 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
2527                 dqm->ops.process_termination = process_termination_nocpsch;
2528                 dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
2529                 dqm->ops.restore_process_queues =
2530                         restore_process_queues_nocpsch;
2531                 dqm->ops.get_wave_state = get_wave_state;
2532                 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
2533                 dqm->ops.checkpoint_mqd = checkpoint_mqd;
2534                 break;
2535         default:
2536                 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
2537                 goto out_free;
2538         }
2539
2540         switch (dev->adev->asic_type) {
2541         case CHIP_CARRIZO:
2542                 device_queue_manager_init_vi(&dqm->asic_ops);
2543                 break;
2544
2545         case CHIP_KAVERI:
2546                 device_queue_manager_init_cik(&dqm->asic_ops);
2547                 break;
2548
2549         case CHIP_HAWAII:
2550                 device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
2551                 break;
2552
2553         case CHIP_TONGA:
2554         case CHIP_FIJI:
2555         case CHIP_POLARIS10:
2556         case CHIP_POLARIS11:
2557         case CHIP_POLARIS12:
2558         case CHIP_VEGAM:
2559                 device_queue_manager_init_vi_tonga(&dqm->asic_ops);
2560                 break;
2561
2562         default:
2563                 if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0))
2564                         device_queue_manager_init_v11(&dqm->asic_ops);
2565                 else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
2566                         device_queue_manager_init_v10_navi10(&dqm->asic_ops);
2567                 else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1))
2568                         device_queue_manager_init_v9(&dqm->asic_ops);
2569                 else {
2570                         WARN(1, "Unexpected ASIC family %u",
2571                              dev->adev->asic_type);
2572                         goto out_free;
2573                 }
2574         }
2575
2576         if (init_mqd_managers(dqm))
2577                 goto out_free;
2578
2579         if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) {
2580                 pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
2581                 goto out_free;
2582         }
2583
2584         if (!dqm->ops.initialize(dqm)) {
2585                 init_waitqueue_head(&dqm->destroy_wait);
2586                 return dqm;
2587         }
2588
2589 out_free:
2590         kfree(dqm);
2591         return NULL;
2592 }
2593
2594 static void deallocate_hiq_sdma_mqd(struct kfd_node *dev,
2595                                     struct kfd_mem_obj *mqd)
2596 {
2597         WARN(!mqd, "No hiq sdma mqd trunk to free");
2598
2599         amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem);
2600 }
2601
2602 void device_queue_manager_uninit(struct device_queue_manager *dqm)
2603 {
2604         dqm->ops.stop(dqm);
2605         dqm->ops.uninitialize(dqm);
2606         if (!dqm->dev->kfd->shared_resources.enable_mes)
2607                 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
2608         kfree(dqm);
2609 }
2610
2611 int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid)
2612 {
2613         struct kfd_process_device *pdd;
2614         struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
2615         int ret = 0;
2616
2617         if (!p)
2618                 return -EINVAL;
2619         WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
2620         pdd = kfd_get_process_device_data(dqm->dev, p);
2621         if (pdd)
2622                 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
2623         kfd_unref_process(p);
2624
2625         return ret;
2626 }
2627
2628 static void kfd_process_hw_exception(struct work_struct *work)
2629 {
2630         struct device_queue_manager *dqm = container_of(work,
2631                         struct device_queue_manager, hw_exception_work);
2632         amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
2633 }
2634
2635 int reserve_debug_trap_vmid(struct device_queue_manager *dqm,
2636                                 struct qcm_process_device *qpd)
2637 {
2638         int r;
2639         int updated_vmid_mask;
2640
2641         if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
2642                 pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
2643                 return -EINVAL;
2644         }
2645
2646         dqm_lock(dqm);
2647
2648         if (dqm->trap_debug_vmid != 0) {
2649                 pr_err("Trap debug id already reserved\n");
2650                 r = -EBUSY;
2651                 goto out_unlock;
2652         }
2653
2654         r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
2655                         USE_DEFAULT_GRACE_PERIOD, false);
2656         if (r)
2657                 goto out_unlock;
2658
2659         updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap;
2660         updated_vmid_mask &= ~(1 << dqm->dev->vm_info.last_vmid_kfd);
2661
2662         dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask;
2663         dqm->trap_debug_vmid = dqm->dev->vm_info.last_vmid_kfd;
2664         r = set_sched_resources(dqm);
2665         if (r)
2666                 goto out_unlock;
2667
2668         r = map_queues_cpsch(dqm);
2669         if (r)
2670                 goto out_unlock;
2671
2672         pr_debug("Reserved VMID for trap debug: %i\n", dqm->trap_debug_vmid);
2673
2674 out_unlock:
2675         dqm_unlock(dqm);
2676         return r;
2677 }
2678
2679 /*
2680  * Releases vmid for the trap debugger
2681  */
2682 int release_debug_trap_vmid(struct device_queue_manager *dqm,
2683                         struct qcm_process_device *qpd)
2684 {
2685         int r;
2686         int updated_vmid_mask;
2687         uint32_t trap_debug_vmid;
2688
2689         if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
2690                 pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
2691                 return -EINVAL;
2692         }
2693
2694         dqm_lock(dqm);
2695         trap_debug_vmid = dqm->trap_debug_vmid;
2696         if (dqm->trap_debug_vmid == 0) {
2697                 pr_err("Trap debug id is not reserved\n");
2698                 r = -EINVAL;
2699                 goto out_unlock;
2700         }
2701
2702         r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
2703                         USE_DEFAULT_GRACE_PERIOD, false);
2704         if (r)
2705                 goto out_unlock;
2706
2707         updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap;
2708         updated_vmid_mask |= (1 << dqm->dev->vm_info.last_vmid_kfd);
2709
2710         dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask;
2711         dqm->trap_debug_vmid = 0;
2712         r = set_sched_resources(dqm);
2713         if (r)
2714                 goto out_unlock;
2715
2716         r = map_queues_cpsch(dqm);
2717         if (r)
2718                 goto out_unlock;
2719
2720         pr_debug("Released VMID for trap debug: %i\n", trap_debug_vmid);
2721
2722 out_unlock:
2723         dqm_unlock(dqm);
2724         return r;
2725 }
2726
2727 #define QUEUE_NOT_FOUND         -1
2728 /* invalidate queue operation in array */
2729 static void q_array_invalidate(uint32_t num_queues, uint32_t *queue_ids)
2730 {
2731         int i;
2732
2733         for (i = 0; i < num_queues; i++)
2734                 queue_ids[i] |= KFD_DBG_QUEUE_INVALID_MASK;
2735 }
2736
2737 /* find queue index in array */
2738 static int q_array_get_index(unsigned int queue_id,
2739                 uint32_t num_queues,
2740                 uint32_t *queue_ids)
2741 {
2742         int i;
2743
2744         for (i = 0; i < num_queues; i++)
2745                 if (queue_id == (queue_ids[i] & ~KFD_DBG_QUEUE_INVALID_MASK))
2746                         return i;
2747
2748         return QUEUE_NOT_FOUND;
2749 }
2750
2751 struct copy_context_work_handler_workarea {
2752         struct work_struct copy_context_work;
2753         struct kfd_process *p;
2754 };
2755
2756 static void copy_context_work_handler (struct work_struct *work)
2757 {
2758         struct copy_context_work_handler_workarea *workarea;
2759         struct mqd_manager *mqd_mgr;
2760         struct queue *q;
2761         struct mm_struct *mm;
2762         struct kfd_process *p;
2763         uint32_t tmp_ctl_stack_used_size, tmp_save_area_used_size;
2764         int i;
2765
2766         workarea = container_of(work,
2767                         struct copy_context_work_handler_workarea,
2768                         copy_context_work);
2769
2770         p = workarea->p;
2771         mm = get_task_mm(p->lead_thread);
2772
2773         if (!mm)
2774                 return;
2775
2776         kthread_use_mm(mm);
2777         for (i = 0; i < p->n_pdds; i++) {
2778                 struct kfd_process_device *pdd = p->pdds[i];
2779                 struct device_queue_manager *dqm = pdd->dev->dqm;
2780                 struct qcm_process_device *qpd = &pdd->qpd;
2781
2782                 list_for_each_entry(q, &qpd->queues_list, list) {
2783                         mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
2784
2785                         /* We ignore the return value from get_wave_state
2786                          * because
2787                          * i) right now, it always returns 0, and
2788                          * ii) if we hit an error, we would continue to the
2789                          *      next queue anyway.
2790                          */
2791                         mqd_mgr->get_wave_state(mqd_mgr,
2792                                         q->mqd,
2793                                         &q->properties,
2794                                         (void __user *) q->properties.ctx_save_restore_area_address,
2795                                         &tmp_ctl_stack_used_size,
2796                                         &tmp_save_area_used_size);
2797                 }
2798         }
2799         kthread_unuse_mm(mm);
2800         mmput(mm);
2801 }
2802
2803 static uint32_t *get_queue_ids(uint32_t num_queues, uint32_t *usr_queue_id_array)
2804 {
2805         size_t array_size = num_queues * sizeof(uint32_t);
2806         uint32_t *queue_ids = NULL;
2807
2808         if (!usr_queue_id_array)
2809                 return NULL;
2810
2811         queue_ids = kzalloc(array_size, GFP_KERNEL);
2812         if (!queue_ids)
2813                 return ERR_PTR(-ENOMEM);
2814
2815         if (copy_from_user(queue_ids, usr_queue_id_array, array_size))
2816                 return ERR_PTR(-EFAULT);
2817
2818         return queue_ids;
2819 }
2820
2821 int resume_queues(struct kfd_process *p,
2822                 uint32_t num_queues,
2823                 uint32_t *usr_queue_id_array)
2824 {
2825         uint32_t *queue_ids = NULL;
2826         int total_resumed = 0;
2827         int i;
2828
2829         if (usr_queue_id_array) {
2830                 queue_ids = get_queue_ids(num_queues, usr_queue_id_array);
2831
2832                 if (IS_ERR(queue_ids))
2833                         return PTR_ERR(queue_ids);
2834
2835                 /* mask all queues as invalid.  unmask per successful request */
2836                 q_array_invalidate(num_queues, queue_ids);
2837         }
2838
2839         for (i = 0; i < p->n_pdds; i++) {
2840                 struct kfd_process_device *pdd = p->pdds[i];
2841                 struct device_queue_manager *dqm = pdd->dev->dqm;
2842                 struct qcm_process_device *qpd = &pdd->qpd;
2843                 struct queue *q;
2844                 int r, per_device_resumed = 0;
2845
2846                 dqm_lock(dqm);
2847
2848                 /* unmask queues that resume or already resumed as valid */
2849                 list_for_each_entry(q, &qpd->queues_list, list) {
2850                         int q_idx = QUEUE_NOT_FOUND;
2851
2852                         if (queue_ids)
2853                                 q_idx = q_array_get_index(
2854                                                 q->properties.queue_id,
2855                                                 num_queues,
2856                                                 queue_ids);
2857
2858                         if (!queue_ids || q_idx != QUEUE_NOT_FOUND) {
2859                                 int err = resume_single_queue(dqm, &pdd->qpd, q);
2860
2861                                 if (queue_ids) {
2862                                         if (!err) {
2863                                                 queue_ids[q_idx] &=
2864                                                         ~KFD_DBG_QUEUE_INVALID_MASK;
2865                                         } else {
2866                                                 queue_ids[q_idx] |=
2867                                                         KFD_DBG_QUEUE_ERROR_MASK;
2868                                                 break;
2869                                         }
2870                                 }
2871
2872                                 if (dqm->dev->kfd->shared_resources.enable_mes) {
2873                                         wake_up_all(&dqm->destroy_wait);
2874                                         if (!err)
2875                                                 total_resumed++;
2876                                 } else {
2877                                         per_device_resumed++;
2878                                 }
2879                         }
2880                 }
2881
2882                 if (!per_device_resumed) {
2883                         dqm_unlock(dqm);
2884                         continue;
2885                 }
2886
2887                 r = execute_queues_cpsch(dqm,
2888                                         KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
2889                                         0,
2890                                         USE_DEFAULT_GRACE_PERIOD);
2891                 if (r) {
2892                         pr_err("Failed to resume process queues\n");
2893                         if (queue_ids) {
2894                                 list_for_each_entry(q, &qpd->queues_list, list) {
2895                                         int q_idx = q_array_get_index(
2896                                                         q->properties.queue_id,
2897                                                         num_queues,
2898                                                         queue_ids);
2899
2900                                         /* mask queue as error on resume fail */
2901                                         if (q_idx != QUEUE_NOT_FOUND)
2902                                                 queue_ids[q_idx] |=
2903                                                         KFD_DBG_QUEUE_ERROR_MASK;
2904                                 }
2905                         }
2906                 } else {
2907                         wake_up_all(&dqm->destroy_wait);
2908                         total_resumed += per_device_resumed;
2909                 }
2910
2911                 dqm_unlock(dqm);
2912         }
2913
2914         if (queue_ids) {
2915                 if (copy_to_user((void __user *)usr_queue_id_array, queue_ids,
2916                                 num_queues * sizeof(uint32_t)))
2917                         pr_err("copy_to_user failed on queue resume\n");
2918
2919                 kfree(queue_ids);
2920         }
2921
2922         return total_resumed;
2923 }
2924
2925 int suspend_queues(struct kfd_process *p,
2926                         uint32_t num_queues,
2927                         uint32_t grace_period,
2928                         uint64_t exception_clear_mask,
2929                         uint32_t *usr_queue_id_array)
2930 {
2931         uint32_t *queue_ids = get_queue_ids(num_queues, usr_queue_id_array);
2932         int total_suspended = 0;
2933         int i;
2934
2935         if (IS_ERR(queue_ids))
2936                 return PTR_ERR(queue_ids);
2937
2938         /* mask all queues as invalid.  umask on successful request */
2939         q_array_invalidate(num_queues, queue_ids);
2940
2941         for (i = 0; i < p->n_pdds; i++) {
2942                 struct kfd_process_device *pdd = p->pdds[i];
2943                 struct device_queue_manager *dqm = pdd->dev->dqm;
2944                 struct qcm_process_device *qpd = &pdd->qpd;
2945                 struct queue *q;
2946                 int r, per_device_suspended = 0;
2947
2948                 mutex_lock(&p->event_mutex);
2949                 dqm_lock(dqm);
2950
2951                 /* unmask queues that suspend or already suspended */
2952                 list_for_each_entry(q, &qpd->queues_list, list) {
2953                         int q_idx = q_array_get_index(q->properties.queue_id,
2954                                                         num_queues,
2955                                                         queue_ids);
2956
2957                         if (q_idx != QUEUE_NOT_FOUND) {
2958                                 int err = suspend_single_queue(dqm, pdd, q);
2959                                 bool is_mes = dqm->dev->kfd->shared_resources.enable_mes;
2960
2961                                 if (!err) {
2962                                         queue_ids[q_idx] &= ~KFD_DBG_QUEUE_INVALID_MASK;
2963                                         if (exception_clear_mask && is_mes)
2964                                                 q->properties.exception_status &=
2965                                                         ~exception_clear_mask;
2966
2967                                         if (is_mes)
2968                                                 total_suspended++;
2969                                         else
2970                                                 per_device_suspended++;
2971                                 } else if (err != -EBUSY) {
2972                                         r = err;
2973                                         queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK;
2974                                         break;
2975                                 }
2976                         }
2977                 }
2978
2979                 if (!per_device_suspended) {
2980                         dqm_unlock(dqm);
2981                         mutex_unlock(&p->event_mutex);
2982                         if (total_suspended)
2983                                 amdgpu_amdkfd_debug_mem_fence(dqm->dev->adev);
2984                         continue;
2985                 }
2986
2987                 r = execute_queues_cpsch(dqm,
2988                         KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
2989                         grace_period);
2990
2991                 if (r)
2992                         pr_err("Failed to suspend process queues.\n");
2993                 else
2994                         total_suspended += per_device_suspended;
2995
2996                 list_for_each_entry(q, &qpd->queues_list, list) {
2997                         int q_idx = q_array_get_index(q->properties.queue_id,
2998                                                 num_queues, queue_ids);
2999
3000                         if (q_idx == QUEUE_NOT_FOUND)
3001                                 continue;
3002
3003                         /* mask queue as error on suspend fail */
3004                         if (r)
3005                                 queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK;
3006                         else if (exception_clear_mask)
3007                                 q->properties.exception_status &=
3008                                                         ~exception_clear_mask;
3009                 }
3010
3011                 dqm_unlock(dqm);
3012                 mutex_unlock(&p->event_mutex);
3013                 amdgpu_device_flush_hdp(dqm->dev->adev, NULL);
3014         }
3015
3016         if (total_suspended) {
3017                 struct copy_context_work_handler_workarea copy_context_worker;
3018
3019                 INIT_WORK_ONSTACK(
3020                                 &copy_context_worker.copy_context_work,
3021                                 copy_context_work_handler);
3022
3023                 copy_context_worker.p = p;
3024
3025                 schedule_work(&copy_context_worker.copy_context_work);
3026
3027
3028                 flush_work(&copy_context_worker.copy_context_work);
3029                 destroy_work_on_stack(&copy_context_worker.copy_context_work);
3030         }
3031
3032         if (copy_to_user((void __user *)usr_queue_id_array, queue_ids,
3033                         num_queues * sizeof(uint32_t)))
3034                 pr_err("copy_to_user failed on queue suspend\n");
3035
3036         kfree(queue_ids);
3037
3038         return total_suspended;
3039 }
3040
3041 static uint32_t set_queue_type_for_user(struct queue_properties *q_props)
3042 {
3043         switch (q_props->type) {
3044         case KFD_QUEUE_TYPE_COMPUTE:
3045                 return q_props->format == KFD_QUEUE_FORMAT_PM4
3046                                         ? KFD_IOC_QUEUE_TYPE_COMPUTE
3047                                         : KFD_IOC_QUEUE_TYPE_COMPUTE_AQL;
3048         case KFD_QUEUE_TYPE_SDMA:
3049                 return KFD_IOC_QUEUE_TYPE_SDMA;
3050         case KFD_QUEUE_TYPE_SDMA_XGMI:
3051                 return KFD_IOC_QUEUE_TYPE_SDMA_XGMI;
3052         default:
3053                 WARN_ONCE(true, "queue type not recognized!");
3054                 return 0xffffffff;
3055         };
3056 }
3057
3058 void set_queue_snapshot_entry(struct queue *q,
3059                               uint64_t exception_clear_mask,
3060                               struct kfd_queue_snapshot_entry *qss_entry)
3061 {
3062         qss_entry->ring_base_address = q->properties.queue_address;
3063         qss_entry->write_pointer_address = (uint64_t)q->properties.write_ptr;
3064         qss_entry->read_pointer_address = (uint64_t)q->properties.read_ptr;
3065         qss_entry->ctx_save_restore_address =
3066                                 q->properties.ctx_save_restore_area_address;
3067         qss_entry->ctx_save_restore_area_size =
3068                                 q->properties.ctx_save_restore_area_size;
3069         qss_entry->exception_status = q->properties.exception_status;
3070         qss_entry->queue_id = q->properties.queue_id;
3071         qss_entry->gpu_id = q->device->id;
3072         qss_entry->ring_size = (uint32_t)q->properties.queue_size;
3073         qss_entry->queue_type = set_queue_type_for_user(&q->properties);
3074         q->properties.exception_status &= ~exception_clear_mask;
3075 }
3076
3077 int debug_lock_and_unmap(struct device_queue_manager *dqm)
3078 {
3079         int r;
3080
3081         if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
3082                 pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
3083                 return -EINVAL;
3084         }
3085
3086         if (!kfd_dbg_is_per_vmid_supported(dqm->dev))
3087                 return 0;
3088
3089         dqm_lock(dqm);
3090
3091         r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 0, false);
3092         if (r)
3093                 dqm_unlock(dqm);
3094
3095         return r;
3096 }
3097
3098 int debug_map_and_unlock(struct device_queue_manager *dqm)
3099 {
3100         int r;
3101
3102         if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
3103                 pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
3104                 return -EINVAL;
3105         }
3106
3107         if (!kfd_dbg_is_per_vmid_supported(dqm->dev))
3108                 return 0;
3109
3110         r = map_queues_cpsch(dqm);
3111
3112         dqm_unlock(dqm);
3113
3114         return r;
3115 }
3116
3117 int debug_refresh_runlist(struct device_queue_manager *dqm)
3118 {
3119         int r = debug_lock_and_unmap(dqm);
3120
3121         if (r)
3122                 return r;
3123
3124         return debug_map_and_unlock(dqm);
3125 }
3126
3127 #if defined(CONFIG_DEBUG_FS)
3128
3129 static void seq_reg_dump(struct seq_file *m,
3130                          uint32_t (*dump)[2], uint32_t n_regs)
3131 {
3132         uint32_t i, count;
3133
3134         for (i = 0, count = 0; i < n_regs; i++) {
3135                 if (count == 0 ||
3136                     dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
3137                         seq_printf(m, "%s    %08x: %08x",
3138                                    i ? "\n" : "",
3139                                    dump[i][0], dump[i][1]);
3140                         count = 7;
3141                 } else {
3142                         seq_printf(m, " %08x", dump[i][1]);
3143                         count--;
3144                 }
3145         }
3146
3147         seq_puts(m, "\n");
3148 }
3149
3150 int dqm_debugfs_hqds(struct seq_file *m, void *data)
3151 {
3152         struct device_queue_manager *dqm = data;
3153         uint32_t xcc_mask = dqm->dev->xcc_mask;
3154         uint32_t (*dump)[2], n_regs;
3155         int pipe, queue;
3156         int r = 0, xcc_id;
3157         uint32_t sdma_engine_start;
3158
3159         if (!dqm->sched_running) {
3160                 seq_puts(m, " Device is stopped\n");
3161                 return 0;
3162         }
3163
3164         for_each_inst(xcc_id, xcc_mask) {
3165                 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
3166                                                 KFD_CIK_HIQ_PIPE,
3167                                                 KFD_CIK_HIQ_QUEUE, &dump,
3168                                                 &n_regs, xcc_id);
3169                 if (!r) {
3170                         seq_printf(
3171                                 m,
3172                                 "   Inst %d, HIQ on MEC %d Pipe %d Queue %d\n",
3173                                 xcc_id,
3174                                 KFD_CIK_HIQ_PIPE / get_pipes_per_mec(dqm) + 1,
3175                                 KFD_CIK_HIQ_PIPE % get_pipes_per_mec(dqm),
3176                                 KFD_CIK_HIQ_QUEUE);
3177                         seq_reg_dump(m, dump, n_regs);
3178
3179                         kfree(dump);
3180                 }
3181
3182                 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
3183                         int pipe_offset = pipe * get_queues_per_pipe(dqm);
3184
3185                         for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
3186                                 if (!test_bit(pipe_offset + queue,
3187                                       dqm->dev->kfd->shared_resources.cp_queue_bitmap))
3188                                         continue;
3189
3190                                 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
3191                                                                 pipe, queue,
3192                                                                 &dump, &n_regs,
3193                                                                 xcc_id);
3194                                 if (r)
3195                                         break;
3196
3197                                 seq_printf(m,
3198                                            " Inst %d,  CP Pipe %d, Queue %d\n",
3199                                            xcc_id, pipe, queue);
3200                                 seq_reg_dump(m, dump, n_regs);
3201
3202                                 kfree(dump);
3203                         }
3204                 }
3205         }
3206
3207         sdma_engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm);
3208         for (pipe = sdma_engine_start;
3209              pipe < (sdma_engine_start + get_num_all_sdma_engines(dqm));
3210              pipe++) {
3211                 for (queue = 0;
3212                      queue < dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
3213                      queue++) {
3214                         r = dqm->dev->kfd2kgd->hqd_sdma_dump(
3215                                 dqm->dev->adev, pipe, queue, &dump, &n_regs);
3216                         if (r)
3217                                 break;
3218
3219                         seq_printf(m, "  SDMA Engine %d, RLC %d\n",
3220                                   pipe, queue);
3221                         seq_reg_dump(m, dump, n_regs);
3222
3223                         kfree(dump);
3224                 }
3225         }
3226
3227         return r;
3228 }
3229
3230 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm)
3231 {
3232         int r = 0;
3233
3234         dqm_lock(dqm);
3235         r = pm_debugfs_hang_hws(&dqm->packet_mgr);
3236         if (r) {
3237                 dqm_unlock(dqm);
3238                 return r;
3239         }
3240         dqm->active_runlist = true;
3241         r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES,
3242                                 0, USE_DEFAULT_GRACE_PERIOD);
3243         dqm_unlock(dqm);
3244
3245         return r;
3246 }
3247
3248 #endif