drivers/gpu/drm/scheduler/sched_main.c

   1 /*
   2  * Copyright 2015 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  */
  23
  24 /**
  25  * DOC: Overview
  26  *
  27  * The GPU scheduler provides entities which allow userspace to push jobs
  28  * into software queues which are then scheduled on a hardware run queue.
  29  * The software queues have a priority among them. The scheduler selects the entities
  30  * from the run queue using a FIFO. The scheduler provides dependency handling
  31  * features among jobs. The driver is supposed to provide callback functions for
  32  * backend operations to the scheduler like submitting a job to hardware run queue,
  33  * returning the dependencies of a job etc.
  34  *
  35  * The organisation of the scheduler is the following:
  36  *
  37  * 1. Each hw run queue has one scheduler
  38  * 2. Each scheduler has multiple run queues with different priorities
  39  *    (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL)
  40  * 3. Each scheduler run queue has a queue of entities to schedule
  41  * 4. Entities themselves maintain a queue of jobs that will be scheduled on
  42  *    the hardware.
  43  *
  44  * The jobs in a entity are always scheduled in the order that they were pushed.
  45  */
  46
  47 #include <linux/kthread.h>
  48 #include <linux/wait.h>
  49 #include <linux/sched.h>
  50 #include <linux/completion.h>
  51 #include <uapi/linux/sched/types.h>
  52
  53 #include <drm/drm_print.h>
  54 #include <drm/gpu_scheduler.h>
  55 #include <drm/spsc_queue.h>
  56
  57 #define CREATE_TRACE_POINTS
  58 #include "gpu_scheduler_trace.h"
  59
  60 #define to_drm_sched_job(sched_job)             \
  61                 container_of((sched_job), struct drm_sched_job, queue_node)
  62
  63 static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb);
  64
  65 /**
  66  * drm_sched_rq_init - initialize a given run queue struct
  67  *
  68  * @rq: scheduler run queue
  69  *
  70  * Initializes a scheduler runqueue.
  71  */
  72 static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
  73                               struct drm_sched_rq *rq)
  74 {
  75         spin_lock_init(&rq->lock);
  76         INIT_LIST_HEAD(&rq->entities);
  77         rq->current_entity = NULL;
  78         rq->sched = sched;
  79 }
  80
  81 /**
  82  * drm_sched_rq_add_entity - add an entity
  83  *
  84  * @rq: scheduler run queue
  85  * @entity: scheduler entity
  86  *
  87  * Adds a scheduler entity to the run queue.
  88  */
  89 void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
  90                              struct drm_sched_entity *entity)
  91 {
  92         if (!list_empty(&entity->list))
  93                 return;
  94         spin_lock(&rq->lock);
  95         list_add_tail(&entity->list, &rq->entities);
  96         spin_unlock(&rq->lock);
  97 }
  98
  99 /**
 100  * drm_sched_rq_remove_entity - remove an entity
 101  *
 102  * @rq: scheduler run queue
 103  * @entity: scheduler entity
 104  *
 105  * Removes a scheduler entity from the run queue.
 106  */
 107 void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
 108                                 struct drm_sched_entity *entity)
 109 {
 110         if (list_empty(&entity->list))
 111                 return;
 112         spin_lock(&rq->lock);
 113         list_del_init(&entity->list);
 114         if (rq->current_entity == entity)
 115                 rq->current_entity = NULL;
 116         spin_unlock(&rq->lock);
 117 }
 118
 119 /**
 120  * drm_sched_rq_select_entity - Select an entity which could provide a job to run
 121  *
 122  * @rq: scheduler run queue to check.
 123  *
 124  * Try to find a ready entity, returns NULL if none found.
 125  */
 126 static struct drm_sched_entity *
 127 drm_sched_rq_select_entity(struct drm_sched_rq *rq)
 128 {
 129         struct drm_sched_entity *entity;
 130
 131         spin_lock(&rq->lock);
 132
 133         entity = rq->current_entity;
 134         if (entity) {
 135                 list_for_each_entry_continue(entity, &rq->entities, list) {
 136                         if (drm_sched_entity_is_ready(entity)) {
 137                                 rq->current_entity = entity;
 138                                 reinit_completion(&entity->entity_idle);
 139                                 spin_unlock(&rq->lock);
 140                                 return entity;
 141                         }
 142                 }
 143         }
 144
 145         list_for_each_entry(entity, &rq->entities, list) {
 146
 147                 if (drm_sched_entity_is_ready(entity)) {
 148                         rq->current_entity = entity;
 149                         reinit_completion(&entity->entity_idle);
 150                         spin_unlock(&rq->lock);
 151                         return entity;
 152                 }
 153
 154                 if (entity == rq->current_entity)
 155                         break;
 156         }
 157
 158         spin_unlock(&rq->lock);
 159
 160         return NULL;
 161 }
 162
 163 /**
 164  * drm_sched_dependency_optimized
 165  *
 166  * @fence: the dependency fence
 167  * @entity: the entity which depends on the above fence
 168  *
 169  * Returns true if the dependency can be optimized and false otherwise
 170  */
 171 bool drm_sched_dependency_optimized(struct dma_fence* fence,
 172                                     struct drm_sched_entity *entity)
 173 {
 174         struct drm_gpu_scheduler *sched = entity->rq->sched;
 175         struct drm_sched_fence *s_fence;
 176
 177         if (!fence || dma_fence_is_signaled(fence))
 178                 return false;
 179         if (fence->context == entity->fence_context)
 180                 return true;
 181         s_fence = to_drm_sched_fence(fence);
 182         if (s_fence && s_fence->sched == sched)
 183                 return true;
 184
 185         return false;
 186 }
 187 EXPORT_SYMBOL(drm_sched_dependency_optimized);
 188
 189 /**
 190  * drm_sched_start_timeout - start timeout for reset worker
 191  *
 192  * @sched: scheduler instance to start the worker for
 193  *
 194  * Start the timeout for the given scheduler.
 195  */
 196 static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
 197 {
 198         if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 199             !list_empty(&sched->ring_mirror_list))
 200                 schedule_delayed_work(&sched->work_tdr, sched->timeout);
 201 }
 202
 203 /**
 204  * drm_sched_fault - immediately start timeout handler
 205  *
 206  * @sched: scheduler where the timeout handling should be started.
 207  *
 208  * Start timeout handling immediately when the driver detects a hardware fault.
 209  */
 210 void drm_sched_fault(struct drm_gpu_scheduler *sched)
 211 {
 212         mod_delayed_work(system_wq, &sched->work_tdr, 0);
 213 }
 214 EXPORT_SYMBOL(drm_sched_fault);
 215
 216 /**
 217  * drm_sched_suspend_timeout - Suspend scheduler job timeout
 218  *
 219  * @sched: scheduler instance for which to suspend the timeout
 220  *
 221  * Suspend the delayed work timeout for the scheduler. This is done by
 222  * modifying the delayed work timeout to an arbitrary large value,
 223  * MAX_SCHEDULE_TIMEOUT in this case. Note that this function can be
 224  * called from an IRQ context.
 225  *
 226  * Returns the timeout remaining
 227  *
 228  */
 229 unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
 230 {
 231         unsigned long sched_timeout, now = jiffies;
 232
 233         sched_timeout = sched->work_tdr.timer.expires;
 234
 235         /*
 236          * Modify the timeout to an arbitrarily large value. This also prevents
 237          * the timeout to be restarted when new submissions arrive
 238          */
 239         if (mod_delayed_work(system_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
 240                         && time_after(sched_timeout, now))
 241                 return sched_timeout - now;
 242         else
 243                 return sched->timeout;
 244 }
 245 EXPORT_SYMBOL(drm_sched_suspend_timeout);
 246
 247 /**
 248  * drm_sched_resume_timeout - Resume scheduler job timeout
 249  *
 250  * @sched: scheduler instance for which to resume the timeout
 251  * @remaining: remaining timeout
 252  *
 253  * Resume the delayed work timeout for the scheduler. Note that
 254  * this function can be called from an IRQ context.
 255  */
 256 void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
 257                 unsigned long remaining)
 258 {
 259         unsigned long flags;
 260
 261         spin_lock_irqsave(&sched->job_list_lock, flags);
 262
 263         if (list_empty(&sched->ring_mirror_list))
 264                 cancel_delayed_work(&sched->work_tdr);
 265         else
 266                 mod_delayed_work(system_wq, &sched->work_tdr, remaining);
 267
 268         spin_unlock_irqrestore(&sched->job_list_lock, flags);
 269 }
 270 EXPORT_SYMBOL(drm_sched_resume_timeout);
 271
 272 static void drm_sched_job_begin(struct drm_sched_job *s_job)
 273 {
 274         struct drm_gpu_scheduler *sched = s_job->sched;
 275         unsigned long flags;
 276
 277         spin_lock_irqsave(&sched->job_list_lock, flags);
 278         list_add_tail(&s_job->node, &sched->ring_mirror_list);
 279         drm_sched_start_timeout(sched);
 280         spin_unlock_irqrestore(&sched->job_list_lock, flags);
 281 }
 282
 283 static void drm_sched_job_timedout(struct work_struct *work)
 284 {
 285         struct drm_gpu_scheduler *sched;
 286         struct drm_sched_job *job;
 287         unsigned long flags;
 288
 289         sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
 290
 291         /* Protects against concurrent deletion in drm_sched_get_cleanup_job */
 292         spin_lock_irqsave(&sched->job_list_lock, flags);
 293         job = list_first_entry_or_null(&sched->ring_mirror_list,
 294                                        struct drm_sched_job, node);
 295
 296         if (job) {
 297                 /*
 298                  * Remove the bad job so it cannot be freed by concurrent
 299                  * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread
 300                  * is parked at which point it's safe.
 301                  */
 302                 list_del_init(&job->node);
 303                 spin_unlock_irqrestore(&sched->job_list_lock, flags);
 304
 305                 job->sched->ops->timedout_job(job);
 306
 307                 /*
 308                  * Guilty job did complete and hence needs to be manually removed
 309                  * See drm_sched_stop doc.
 310                  */
 311                 if (sched->free_guilty) {
 312                         job->sched->ops->free_job(job);
 313                         sched->free_guilty = false;
 314                 }
 315         } else {
 316                 spin_unlock_irqrestore(&sched->job_list_lock, flags);
 317         }
 318
 319         spin_lock_irqsave(&sched->job_list_lock, flags);
 320         drm_sched_start_timeout(sched);
 321         spin_unlock_irqrestore(&sched->job_list_lock, flags);
 322 }
 323
 324  /**
 325   * drm_sched_increase_karma - Update sched_entity guilty flag
 326   *
 327   * @bad: The job guilty of time out
 328   *
 329   * Increment on every hang caused by the 'bad' job. If this exceeds the hang
 330   * limit of the scheduler then the respective sched entity is marked guilty and
 331   * jobs from it will not be scheduled further
 332   */
 333 void drm_sched_increase_karma(struct drm_sched_job *bad)
 334 {
 335         int i;
 336         struct drm_sched_entity *tmp;
 337         struct drm_sched_entity *entity;
 338         struct drm_gpu_scheduler *sched = bad->sched;
 339
 340         /* don't increase @bad's karma if it's from KERNEL RQ,
 341          * because sometimes GPU hang would cause kernel jobs (like VM updating jobs)
 342          * corrupt but keep in mind that kernel jobs always considered good.
 343          */
 344         if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
 345                 atomic_inc(&bad->karma);
 346                 for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL;
 347                      i++) {
 348                         struct drm_sched_rq *rq = &sched->sched_rq[i];
 349
 350                         spin_lock(&rq->lock);
 351                         list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
 352                                 if (bad->s_fence->scheduled.context ==
 353                                     entity->fence_context) {
 354                                         if (atomic_read(&bad->karma) >
 355                                             bad->sched->hang_limit)
 356                                                 if (entity->guilty)
 357                                                         atomic_set(entity->guilty, 1);
 358                                         break;
 359                                 }
 360                         }
 361                         spin_unlock(&rq->lock);
 362                         if (&entity->list != &rq->entities)
 363                                 break;
 364                 }
 365         }
 366 }
 367 EXPORT_SYMBOL(drm_sched_increase_karma);
 368
 369 /**
 370  * drm_sched_stop - stop the scheduler
 371  *
 372  * @sched: scheduler instance
 373  * @bad: job which caused the time out
 374  *
 375  * Stop the scheduler and also removes and frees all completed jobs.
 376  * Note: bad job will not be freed as it might be used later and so it's
 377  * callers responsibility to release it manually if it's not part of the
 378  * mirror list any more.
 379  *
 380  */
 381 void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
 382 {
 383         struct drm_sched_job *s_job, *tmp;
 384         unsigned long flags;
 385
 386         kthread_park(sched->thread);
 387
 388         /*
 389          * Reinsert back the bad job here - now it's safe as
 390          * drm_sched_get_cleanup_job cannot race against us and release the
 391          * bad job at this point - we parked (waited for) any in progress
 392          * (earlier) cleanups and drm_sched_get_cleanup_job will not be called
 393          * now until the scheduler thread is unparked.
 394          */
 395         if (bad && bad->sched == sched)
 396                 /*
 397                  * Add at the head of the queue to reflect it was the earliest
 398                  * job extracted.
 399                  */
 400                 list_add(&bad->node, &sched->ring_mirror_list);
 401
 402         /*
 403          * Iterate the job list from later to  earlier one and either deactive
 404          * their HW callbacks or remove them from mirror list if they already
 405          * signaled.
 406          * This iteration is thread safe as sched thread is stopped.
 407          */
 408         list_for_each_entry_safe_reverse(s_job, tmp, &sched->ring_mirror_list, node) {
 409                 if (s_job->s_fence->parent &&
 410                     dma_fence_remove_callback(s_job->s_fence->parent,
 411                                               &s_job->cb)) {
 412                         atomic_dec(&sched->hw_rq_count);
 413                 } else {
 414                         /*
 415                          * remove job from ring_mirror_list.
 416                          * Locking here is for concurrent resume timeout
 417                          */
 418                         spin_lock_irqsave(&sched->job_list_lock, flags);
 419                         list_del_init(&s_job->node);
 420                         spin_unlock_irqrestore(&sched->job_list_lock, flags);
 421
 422                         /*
 423                          * Wait for job's HW fence callback to finish using s_job
 424                          * before releasing it.
 425                          *
 426                          * Job is still alive so fence refcount at least 1
 427                          */
 428                         dma_fence_wait(&s_job->s_fence->finished, false);
 429
 430                         /*
 431                          * We must keep bad job alive for later use during
 432                          * recovery by some of the drivers but leave a hint
 433                          * that the guilty job must be released.
 434                          */
 435                         if (bad != s_job)
 436                                 sched->ops->free_job(s_job);
 437                         else
 438                                 sched->free_guilty = true;
 439                 }
 440         }
 441
 442         /*
 443          * Stop pending timer in flight as we rearm it in  drm_sched_start. This
 444          * avoids the pending timeout work in progress to fire right away after
 445          * this TDR finished and before the newly restarted jobs had a
 446          * chance to complete.
 447          */
 448         cancel_delayed_work(&sched->work_tdr);
 449 }
 450
 451 EXPORT_SYMBOL(drm_sched_stop);
 452
 453 /**
 454  * drm_sched_job_recovery - recover jobs after a reset
 455  *
 456  * @sched: scheduler instance
 457  * @full_recovery: proceed with complete sched restart
 458  *
 459  */
 460 void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
 461 {
 462         struct drm_sched_job *s_job, *tmp;
 463         unsigned long flags;
 464         int r;
 465
 466         /*
 467          * Locking the list is not required here as the sched thread is parked
 468          * so no new jobs are being inserted or removed. Also concurrent
 469          * GPU recovers can't run in parallel.
 470          */
 471         list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 472                 struct dma_fence *fence = s_job->s_fence->parent;
 473
 474                 atomic_inc(&sched->hw_rq_count);
 475
 476                 if (!full_recovery)
 477                         continue;
 478
 479                 if (fence) {
 480                         r = dma_fence_add_callback(fence, &s_job->cb,
 481                                                    drm_sched_process_job);
 482                         if (r == -ENOENT)
 483                                 drm_sched_process_job(fence, &s_job->cb);
 484                         else if (r)
 485                                 DRM_ERROR("fence add callback failed (%d)\n",
 486                                           r);
 487                 } else
 488                         drm_sched_process_job(NULL, &s_job->cb);
 489         }
 490
 491         if (full_recovery) {
 492                 spin_lock_irqsave(&sched->job_list_lock, flags);
 493                 drm_sched_start_timeout(sched);
 494                 spin_unlock_irqrestore(&sched->job_list_lock, flags);
 495         }
 496
 497         kthread_unpark(sched->thread);
 498 }
 499 EXPORT_SYMBOL(drm_sched_start);
 500
 501 /**
 502  * drm_sched_resubmit_jobs - helper to relunch job from mirror ring list
 503  *
 504  * @sched: scheduler instance
 505  *
 506  */
 507 void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
 508 {
 509         struct drm_sched_job *s_job, *tmp;
 510         uint64_t guilty_context;
 511         bool found_guilty = false;
 512         struct dma_fence *fence;
 513
 514         list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 515                 struct drm_sched_fence *s_fence = s_job->s_fence;
 516
 517                 if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) {
 518                         found_guilty = true;
 519                         guilty_context = s_job->s_fence->scheduled.context;
 520                 }
 521
 522                 if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
 523                         dma_fence_set_error(&s_fence->finished, -ECANCELED);
 524
 525                 dma_fence_put(s_job->s_fence->parent);
 526                 fence = sched->ops->run_job(s_job);
 527
 528                 if (IS_ERR_OR_NULL(fence)) {
 529                         if (IS_ERR(fence))
 530                                 dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
 531
 532                         s_job->s_fence->parent = NULL;
 533                 } else {
 534                         s_job->s_fence->parent = fence;
 535                 }
 536
 537
 538         }
 539 }
 540 EXPORT_SYMBOL(drm_sched_resubmit_jobs);
 541
 542 /**
 543  * drm_sched_job_init - init a scheduler job
 544  *
 545  * @job: scheduler job to init
 546  * @entity: scheduler entity to use
 547  * @owner: job owner for debugging
 548  *
 549  * Refer to drm_sched_entity_push_job() documentation
 550  * for locking considerations.
 551  *
 552  * Returns 0 for success, negative error code otherwise.
 553  */
 554 int drm_sched_job_init(struct drm_sched_job *job,
 555                        struct drm_sched_entity *entity,
 556                        void *owner)
 557 {
 558         struct drm_gpu_scheduler *sched;
 559
 560         drm_sched_entity_select_rq(entity);
 561         if (!entity->rq)
 562                 return -ENOENT;
 563
 564         sched = entity->rq->sched;
 565
 566         job->sched = sched;
 567         job->entity = entity;
 568         job->s_priority = entity->rq - sched->sched_rq;
 569         job->s_fence = drm_sched_fence_create(entity, owner);
 570         if (!job->s_fence)
 571                 return -ENOMEM;
 572         job->id = atomic64_inc_return(&sched->job_id_count);
 573
 574         INIT_LIST_HEAD(&job->node);
 575
 576         return 0;
 577 }
 578 EXPORT_SYMBOL(drm_sched_job_init);
 579
 580 /**
 581  * drm_sched_job_cleanup - clean up scheduler job resources
 582  *
 583  * @job: scheduler job to clean up
 584  */
 585 void drm_sched_job_cleanup(struct drm_sched_job *job)
 586 {
 587         dma_fence_put(&job->s_fence->finished);
 588         job->s_fence = NULL;
 589 }
 590 EXPORT_SYMBOL(drm_sched_job_cleanup);
 591
 592 /**
 593  * drm_sched_ready - is the scheduler ready
 594  *
 595  * @sched: scheduler instance
 596  *
 597  * Return true if we can push more jobs to the hw, otherwise false.
 598  */
 599 static bool drm_sched_ready(struct drm_gpu_scheduler *sched)
 600 {
 601         return atomic_read(&sched->hw_rq_count) <
 602                 sched->hw_submission_limit;
 603 }
 604
 605 /**
 606  * drm_sched_wakeup - Wake up the scheduler when it is ready
 607  *
 608  * @sched: scheduler instance
 609  *
 610  */
 611 void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
 612 {
 613         if (drm_sched_ready(sched))
 614                 wake_up_interruptible(&sched->wake_up_worker);
 615 }
 616
 617 /**
 618  * drm_sched_select_entity - Select next entity to process
 619  *
 620  * @sched: scheduler instance
 621  *
 622  * Returns the entity to process or NULL if none are found.
 623  */
 624 static struct drm_sched_entity *
 625 drm_sched_select_entity(struct drm_gpu_scheduler *sched)
 626 {
 627         struct drm_sched_entity *entity;
 628         int i;
 629
 630         if (!drm_sched_ready(sched))
 631                 return NULL;
 632
 633         /* Kernel run queue has higher priority than normal run queue*/
 634         for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
 635                 entity = drm_sched_rq_select_entity(&sched->sched_rq[i]);
 636                 if (entity)
 637                         break;
 638         }
 639
 640         return entity;
 641 }
 642
 643 /**
 644  * drm_sched_process_job - process a job
 645  *
 646  * @f: fence
 647  * @cb: fence callbacks
 648  *
 649  * Called after job has finished execution.
 650  */
 651 static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
 652 {
 653         struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
 654         struct drm_sched_fence *s_fence = s_job->s_fence;
 655         struct drm_gpu_scheduler *sched = s_fence->sched;
 656
 657         atomic_dec(&sched->hw_rq_count);
 658         atomic_dec(&sched->num_jobs);
 659
 660         trace_drm_sched_process_job(s_fence);
 661
 662         drm_sched_fence_finished(s_fence);
 663         wake_up_interruptible(&sched->wake_up_worker);
 664 }
 665
 666 /**
 667  * drm_sched_get_cleanup_job - fetch the next finished job to be destroyed
 668  *
 669  * @sched: scheduler instance
 670  *
 671  * Returns the next finished job from the mirror list (if there is one)
 672  * ready for it to be destroyed.
 673  */
 674 static struct drm_sched_job *
 675 drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
 676 {
 677         struct drm_sched_job *job;
 678         unsigned long flags;
 679
 680         /*
 681          * Don't destroy jobs while the timeout worker is running  OR thread
 682          * is being parked and hence assumed to not touch ring_mirror_list
 683          */
 684         if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 685             !cancel_delayed_work(&sched->work_tdr)) ||
 686             __kthread_should_park(sched->thread))
 687                 return NULL;
 688
 689         spin_lock_irqsave(&sched->job_list_lock, flags);
 690
 691         job = list_first_entry_or_null(&sched->ring_mirror_list,
 692                                        struct drm_sched_job, node);
 693
 694         if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
 695                 /* remove job from ring_mirror_list */
 696                 list_del_init(&job->node);
 697         } else {
 698                 job = NULL;
 699                 /* queue timeout for next job */
 700                 drm_sched_start_timeout(sched);
 701         }
 702
 703         spin_unlock_irqrestore(&sched->job_list_lock, flags);
 704
 705         return job;
 706 }
 707
 708 /**
 709  * drm_sched_blocked - check if the scheduler is blocked
 710  *
 711  * @sched: scheduler instance
 712  *
 713  * Returns true if blocked, otherwise false.
 714  */
 715 static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
 716 {
 717         if (kthread_should_park()) {
 718                 kthread_parkme();
 719                 return true;
 720         }
 721
 722         return false;
 723 }
 724
 725 /**
 726  * drm_sched_main - main scheduler thread
 727  *
 728  * @param: scheduler instance
 729  *
 730  * Returns 0.
 731  */
 732 static int drm_sched_main(void *param)
 733 {
 734         struct sched_param sparam = {.sched_priority = 1};
 735         struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
 736         int r;
 737
 738         sched_setscheduler(current, SCHED_FIFO, &sparam);
 739
 740         while (!kthread_should_stop()) {
 741                 struct drm_sched_entity *entity = NULL;
 742                 struct drm_sched_fence *s_fence;
 743                 struct drm_sched_job *sched_job;
 744                 struct dma_fence *fence;
 745                 struct drm_sched_job *cleanup_job = NULL;
 746
 747                 wait_event_interruptible(sched->wake_up_worker,
 748                                          (cleanup_job = drm_sched_get_cleanup_job(sched)) ||
 749                                          (!drm_sched_blocked(sched) &&
 750                                           (entity = drm_sched_select_entity(sched))) ||
 751                                          kthread_should_stop());
 752
 753                 if (cleanup_job) {
 754                         sched->ops->free_job(cleanup_job);
 755                         /* queue timeout for next job */
 756                         drm_sched_start_timeout(sched);
 757                 }
 758
 759                 if (!entity)
 760                         continue;
 761
 762                 sched_job = drm_sched_entity_pop_job(entity);
 763
 764                 complete(&entity->entity_idle);
 765
 766                 if (!sched_job)
 767                         continue;
 768
 769                 s_fence = sched_job->s_fence;
 770
 771                 atomic_inc(&sched->hw_rq_count);
 772                 drm_sched_job_begin(sched_job);
 773
 774                 fence = sched->ops->run_job(sched_job);
 775                 drm_sched_fence_scheduled(s_fence);
 776
 777                 if (!IS_ERR_OR_NULL(fence)) {
 778                         s_fence->parent = dma_fence_get(fence);
 779                         r = dma_fence_add_callback(fence, &sched_job->cb,
 780                                                    drm_sched_process_job);
 781                         if (r == -ENOENT)
 782                                 drm_sched_process_job(fence, &sched_job->cb);
 783                         else if (r)
 784                                 DRM_ERROR("fence add callback failed (%d)\n",
 785                                           r);
 786                         dma_fence_put(fence);
 787                 } else {
 788                         if (IS_ERR(fence))
 789                                 dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
 790
 791                         drm_sched_process_job(NULL, &sched_job->cb);
 792                 }
 793
 794                 wake_up(&sched->job_scheduled);
 795         }
 796         return 0;
 797 }
 798
 799 /**
 800  * drm_sched_init - Init a gpu scheduler instance
 801  *
 802  * @sched: scheduler instance
 803  * @ops: backend operations for this scheduler
 804  * @hw_submission: number of hw submissions that can be in flight
 805  * @hang_limit: number of times to allow a job to hang before dropping it
 806  * @timeout: timeout value in jiffies for the scheduler
 807  * @name: name used for debugging
 808  *
 809  * Return 0 on success, otherwise error code.
 810  */
 811 int drm_sched_init(struct drm_gpu_scheduler *sched,
 812                    const struct drm_sched_backend_ops *ops,
 813                    unsigned hw_submission,
 814                    unsigned hang_limit,
 815                    long timeout,
 816                    const char *name)
 817 {
 818         int i, ret;
 819         sched->ops = ops;
 820         sched->hw_submission_limit = hw_submission;
 821         sched->name = name;
 822         sched->timeout = timeout;
 823         sched->hang_limit = hang_limit;
 824         for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_MAX; i++)
 825                 drm_sched_rq_init(sched, &sched->sched_rq[i]);
 826
 827         init_waitqueue_head(&sched->wake_up_worker);
 828         init_waitqueue_head(&sched->job_scheduled);
 829         INIT_LIST_HEAD(&sched->ring_mirror_list);
 830         spin_lock_init(&sched->job_list_lock);
 831         atomic_set(&sched->hw_rq_count, 0);
 832         INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
 833         atomic_set(&sched->num_jobs, 0);
 834         atomic64_set(&sched->job_id_count, 0);
 835
 836         /* Each scheduler will run on a seperate kernel thread */
 837         sched->thread = kthread_run(drm_sched_main, sched, sched->name);
 838         if (IS_ERR(sched->thread)) {
 839                 ret = PTR_ERR(sched->thread);
 840                 sched->thread = NULL;
 841                 DRM_ERROR("Failed to create scheduler for %s.\n", name);
 842                 return ret;
 843         }
 844
 845         sched->ready = true;
 846         return 0;
 847 }
 848 EXPORT_SYMBOL(drm_sched_init);
 849
 850 /**
 851  * drm_sched_fini - Destroy a gpu scheduler
 852  *
 853  * @sched: scheduler instance
 854  *
 855  * Tears down and cleans up the scheduler.
 856  */
 857 void drm_sched_fini(struct drm_gpu_scheduler *sched)
 858 {
 859         if (sched->thread)
 860                 kthread_stop(sched->thread);
 861
 862         sched->ready = false;
 863 }
 864 EXPORT_SYMBOL(drm_sched_fini);