drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c

   1 /*
   2  * Copyright 2009 Jerome Glisse.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sub license, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20  *
  21  * The above copyright notice and this permission notice (including the
  22  * next paragraph) shall be included in all copies or substantial portions
  23  * of the Software.
  24  *
  25  */
  26 /*
  27  * Authors:
  28  *    Jerome Glisse <glisse@freedesktop.org>
  29  *    Dave Airlie
  30  */
  31 #include <linux/seq_file.h>
  32 #include <linux/atomic.h>
  33 #include <linux/wait.h>
  34 #include <linux/kref.h>
  35 #include <linux/slab.h>
  36 #include <linux/firmware.h>
  37 #include <linux/pm_runtime.h>
  38
  39 #include <drm/drm_drv.h>
  40 #include "amdgpu.h"
  41 #include "amdgpu_trace.h"
  42 #include "amdgpu_reset.h"
  43
  44 /*
  45  * Fences
  46  * Fences mark an event in the GPUs pipeline and are used
  47  * for GPU/CPU synchronization.  When the fence is written,
  48  * it is expected that all buffers associated with that fence
  49  * are no longer in use by the associated ring on the GPU and
  50  * that the relevant GPU caches have been flushed.
  51  */
  52
  53 struct amdgpu_fence {
  54         struct dma_fence base;
  55
  56         /* RB, DMA, etc. */
  57         struct amdgpu_ring              *ring;
  58         ktime_t                         start_timestamp;
  59 };
  60
  61 static struct kmem_cache *amdgpu_fence_slab;
  62
  63 int amdgpu_fence_slab_init(void)
  64 {
  65         amdgpu_fence_slab = kmem_cache_create(
  66                 "amdgpu_fence", sizeof(struct amdgpu_fence), 0,
  67                 SLAB_HWCACHE_ALIGN, NULL);
  68         if (!amdgpu_fence_slab)
  69                 return -ENOMEM;
  70         return 0;
  71 }
  72
  73 void amdgpu_fence_slab_fini(void)
  74 {
  75         rcu_barrier();
  76         kmem_cache_destroy(amdgpu_fence_slab);
  77 }
  78 /*
  79  * Cast helper
  80  */
  81 static const struct dma_fence_ops amdgpu_fence_ops;
  82 static const struct dma_fence_ops amdgpu_job_fence_ops;
  83 static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
  84 {
  85         struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
  86
  87         if (__f->base.ops == &amdgpu_fence_ops ||
  88             __f->base.ops == &amdgpu_job_fence_ops)
  89                 return __f;
  90
  91         return NULL;
  92 }
  93
  94 /**
  95  * amdgpu_fence_write - write a fence value
  96  *
  97  * @ring: ring the fence is associated with
  98  * @seq: sequence number to write
  99  *
 100  * Writes a fence value to memory (all asics).
 101  */
 102 static void amdgpu_fence_write(struct amdgpu_ring *ring, u32 seq)
 103 {
 104         struct amdgpu_fence_driver *drv = &ring->fence_drv;
 105
 106         if (drv->cpu_addr)
 107                 *drv->cpu_addr = cpu_to_le32(seq);
 108 }
 109
 110 /**
 111  * amdgpu_fence_read - read a fence value
 112  *
 113  * @ring: ring the fence is associated with
 114  *
 115  * Reads a fence value from memory (all asics).
 116  * Returns the value of the fence read from memory.
 117  */
 118 static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
 119 {
 120         struct amdgpu_fence_driver *drv = &ring->fence_drv;
 121         u32 seq = 0;
 122
 123         if (drv->cpu_addr)
 124                 seq = le32_to_cpu(*drv->cpu_addr);
 125         else
 126                 seq = atomic_read(&drv->last_seq);
 127
 128         return seq;
 129 }
 130
 131 /**
 132  * amdgpu_fence_emit - emit a fence on the requested ring
 133  *
 134  * @ring: ring the fence is associated with
 135  * @f: resulting fence object
 136  * @job: job the fence is embedded in
 137  * @flags: flags to pass into the subordinate .emit_fence() call
 138  *
 139  * Emits a fence command on the requested ring (all asics).
 140  * Returns 0 on success, -ENOMEM on failure.
 141  */
 142 int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amdgpu_job *job,
 143                       unsigned flags)
 144 {
 145         struct amdgpu_device *adev = ring->adev;
 146         struct dma_fence *fence;
 147         struct amdgpu_fence *am_fence;
 148         struct dma_fence __rcu **ptr;
 149         uint32_t seq;
 150         int r;
 151
 152         if (job == NULL) {
 153                 /* create a sperate hw fence */
 154                 am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC);
 155                 if (am_fence == NULL)
 156                         return -ENOMEM;
 157                 fence = &am_fence->base;
 158                 am_fence->ring = ring;
 159         } else {
 160                 /* take use of job-embedded fence */
 161                 fence = &job->hw_fence;
 162         }
 163
 164         seq = ++ring->fence_drv.sync_seq;
 165         if (job && job->job_run_counter) {
 166                 /* reinit seq for resubmitted jobs */
 167                 fence->seqno = seq;
 168                 /* TO be inline with external fence creation and other drivers */
 169                 dma_fence_get(fence);
 170         } else {
 171                 if (job) {
 172                         dma_fence_init(fence, &amdgpu_job_fence_ops,
 173                                        &ring->fence_drv.lock,
 174                                        adev->fence_context + ring->idx, seq);
 175                         /* Against remove in amdgpu_job_{free, free_cb} */
 176                         dma_fence_get(fence);
 177                 }
 178                 else
 179                         dma_fence_init(fence, &amdgpu_fence_ops,
 180                                        &ring->fence_drv.lock,
 181                                        adev->fence_context + ring->idx, seq);
 182         }
 183
 184         amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
 185                                seq, flags | AMDGPU_FENCE_FLAG_INT);
 186         pm_runtime_get_noresume(adev_to_drm(adev)->dev);
 187         ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
 188         if (unlikely(rcu_dereference_protected(*ptr, 1))) {
 189                 struct dma_fence *old;
 190
 191                 rcu_read_lock();
 192                 old = dma_fence_get_rcu_safe(ptr);
 193                 rcu_read_unlock();
 194
 195                 if (old) {
 196                         r = dma_fence_wait(old, false);
 197                         dma_fence_put(old);
 198                         if (r)
 199                                 return r;
 200                 }
 201         }
 202
 203         to_amdgpu_fence(fence)->start_timestamp = ktime_get();
 204
 205         /* This function can't be called concurrently anyway, otherwise
 206          * emitting the fence would mess up the hardware ring buffer.
 207          */
 208         rcu_assign_pointer(*ptr, dma_fence_get(fence));
 209
 210         *f = fence;
 211
 212         return 0;
 213 }
 214
 215 /**
 216  * amdgpu_fence_emit_polling - emit a fence on the requeste ring
 217  *
 218  * @ring: ring the fence is associated with
 219  * @s: resulting sequence number
 220  * @timeout: the timeout for waiting in usecs
 221  *
 222  * Emits a fence command on the requested ring (all asics).
 223  * Used For polling fence.
 224  * Returns 0 on success, -ENOMEM on failure.
 225  */
 226 int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
 227                               uint32_t timeout)
 228 {
 229         uint32_t seq;
 230         signed long r;
 231
 232         if (!s)
 233                 return -EINVAL;
 234
 235         seq = ++ring->fence_drv.sync_seq;
 236         r = amdgpu_fence_wait_polling(ring,
 237                                       seq - ring->fence_drv.num_fences_mask,
 238                                       timeout);
 239         if (r < 1)
 240                 return -ETIMEDOUT;
 241
 242         amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
 243                                seq, 0);
 244
 245         *s = seq;
 246
 247         return 0;
 248 }
 249
 250 /**
 251  * amdgpu_fence_schedule_fallback - schedule fallback check
 252  *
 253  * @ring: pointer to struct amdgpu_ring
 254  *
 255  * Start a timer as fallback to our interrupts.
 256  */
 257 static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
 258 {
 259         mod_timer(&ring->fence_drv.fallback_timer,
 260                   jiffies + AMDGPU_FENCE_JIFFIES_TIMEOUT);
 261 }
 262
 263 /**
 264  * amdgpu_fence_process - check for fence activity
 265  *
 266  * @ring: pointer to struct amdgpu_ring
 267  *
 268  * Checks the current fence value and calculates the last
 269  * signalled fence value. Wakes the fence queue if the
 270  * sequence number has increased.
 271  *
 272  * Returns true if fence was processed
 273  */
 274 bool amdgpu_fence_process(struct amdgpu_ring *ring)
 275 {
 276         struct amdgpu_fence_driver *drv = &ring->fence_drv;
 277         struct amdgpu_device *adev = ring->adev;
 278         uint32_t seq, last_seq;
 279
 280         do {
 281                 last_seq = atomic_read(&ring->fence_drv.last_seq);
 282                 seq = amdgpu_fence_read(ring);
 283
 284         } while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
 285
 286         if (del_timer(&ring->fence_drv.fallback_timer) &&
 287             seq != ring->fence_drv.sync_seq)
 288                 amdgpu_fence_schedule_fallback(ring);
 289
 290         if (unlikely(seq == last_seq))
 291                 return false;
 292
 293         last_seq &= drv->num_fences_mask;
 294         seq &= drv->num_fences_mask;
 295
 296         do {
 297                 struct dma_fence *fence, **ptr;
 298
 299                 ++last_seq;
 300                 last_seq &= drv->num_fences_mask;
 301                 ptr = &drv->fences[last_seq];
 302
 303                 /* There is always exactly one thread signaling this fence slot */
 304                 fence = rcu_dereference_protected(*ptr, 1);
 305                 RCU_INIT_POINTER(*ptr, NULL);
 306
 307                 if (!fence)
 308                         continue;
 309
 310                 dma_fence_signal(fence);
 311                 dma_fence_put(fence);
 312                 pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
 313                 pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
 314         } while (last_seq != seq);
 315
 316         return true;
 317 }
 318
 319 /**
 320  * amdgpu_fence_fallback - fallback for hardware interrupts
 321  *
 322  * @t: timer context used to obtain the pointer to ring structure
 323  *
 324  * Checks for fence activity.
 325  */
 326 static void amdgpu_fence_fallback(struct timer_list *t)
 327 {
 328         struct amdgpu_ring *ring = from_timer(ring, t,
 329                                               fence_drv.fallback_timer);
 330
 331         if (amdgpu_fence_process(ring))
 332                 DRM_WARN("Fence fallback timer expired on ring %s\n", ring->name);
 333 }
 334
 335 /**
 336  * amdgpu_fence_wait_empty - wait for all fences to signal
 337  *
 338  * @ring: ring index the fence is associated with
 339  *
 340  * Wait for all fences on the requested ring to signal (all asics).
 341  * Returns 0 if the fences have passed, error for all other cases.
 342  */
 343 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
 344 {
 345         uint64_t seq = READ_ONCE(ring->fence_drv.sync_seq);
 346         struct dma_fence *fence, **ptr;
 347         int r;
 348
 349         if (!seq)
 350                 return 0;
 351
 352         ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
 353         rcu_read_lock();
 354         fence = rcu_dereference(*ptr);
 355         if (!fence || !dma_fence_get_rcu(fence)) {
 356                 rcu_read_unlock();
 357                 return 0;
 358         }
 359         rcu_read_unlock();
 360
 361         r = dma_fence_wait(fence, false);
 362         dma_fence_put(fence);
 363         return r;
 364 }
 365
 366 /**
 367  * amdgpu_fence_wait_polling - busy wait for givn sequence number
 368  *
 369  * @ring: ring index the fence is associated with
 370  * @wait_seq: sequence number to wait
 371  * @timeout: the timeout for waiting in usecs
 372  *
 373  * Wait for all fences on the requested ring to signal (all asics).
 374  * Returns left time if no timeout, 0 or minus if timeout.
 375  */
 376 signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
 377                                       uint32_t wait_seq,
 378                                       signed long timeout)
 379 {
 380         uint32_t seq;
 381
 382         do {
 383                 seq = amdgpu_fence_read(ring);
 384                 udelay(5);
 385                 timeout -= 5;
 386         } while ((int32_t)(wait_seq - seq) > 0 && timeout > 0);
 387
 388         return timeout > 0 ? timeout : 0;
 389 }
 390 /**
 391  * amdgpu_fence_count_emitted - get the count of emitted fences
 392  *
 393  * @ring: ring the fence is associated with
 394  *
 395  * Get the number of fences emitted on the requested ring (all asics).
 396  * Returns the number of emitted fences on the ring.  Used by the
 397  * dynpm code to ring track activity.
 398  */
 399 unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
 400 {
 401         uint64_t emitted;
 402
 403         /* We are not protected by ring lock when reading the last sequence
 404          * but it's ok to report slightly wrong fence count here.
 405          */
 406         emitted = 0x100000000ull;
 407         emitted -= atomic_read(&ring->fence_drv.last_seq);
 408         emitted += READ_ONCE(ring->fence_drv.sync_seq);
 409         return lower_32_bits(emitted);
 410 }
 411
 412 /**
 413  * amdgpu_fence_last_unsignaled_time_us - the time fence emitted until now
 414  * @ring: ring the fence is associated with
 415  *
 416  * Find the earliest fence unsignaled until now, calculate the time delta
 417  * between the time fence emitted and now.
 418  */
 419 u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring)
 420 {
 421         struct amdgpu_fence_driver *drv = &ring->fence_drv;
 422         struct dma_fence *fence;
 423         uint32_t last_seq, sync_seq;
 424
 425         last_seq = atomic_read(&ring->fence_drv.last_seq);
 426         sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
 427         if (last_seq == sync_seq)
 428                 return 0;
 429
 430         ++last_seq;
 431         last_seq &= drv->num_fences_mask;
 432         fence = drv->fences[last_seq];
 433         if (!fence)
 434                 return 0;
 435
 436         return ktime_us_delta(ktime_get(),
 437                 to_amdgpu_fence(fence)->start_timestamp);
 438 }
 439
 440 /**
 441  * amdgpu_fence_update_start_timestamp - update the timestamp of the fence
 442  * @ring: ring the fence is associated with
 443  * @seq: the fence seq number to update.
 444  * @timestamp: the start timestamp to update.
 445  *
 446  * The function called at the time the fence and related ib is about to
 447  * resubmit to gpu in MCBP scenario. Thus we do not consider race condition
 448  * with amdgpu_fence_process to modify the same fence.
 449  */
 450 void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, ktime_t timestamp)
 451 {
 452         struct amdgpu_fence_driver *drv = &ring->fence_drv;
 453         struct dma_fence *fence;
 454
 455         seq &= drv->num_fences_mask;
 456         fence = drv->fences[seq];
 457         if (!fence)
 458                 return;
 459
 460         to_amdgpu_fence(fence)->start_timestamp = timestamp;
 461 }
 462
 463 /**
 464  * amdgpu_fence_driver_start_ring - make the fence driver
 465  * ready for use on the requested ring.
 466  *
 467  * @ring: ring to start the fence driver on
 468  * @irq_src: interrupt source to use for this ring
 469  * @irq_type: interrupt type to use for this ring
 470  *
 471  * Make the fence driver ready for processing (all asics).
 472  * Not all asics have all rings, so each asic will only
 473  * start the fence driver on the rings it has.
 474  * Returns 0 for success, errors for failure.
 475  */
 476 int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
 477                                    struct amdgpu_irq_src *irq_src,
 478                                    unsigned irq_type)
 479 {
 480         struct amdgpu_device *adev = ring->adev;
 481         uint64_t index;
 482
 483         if (ring->funcs->type != AMDGPU_RING_TYPE_UVD) {
 484                 ring->fence_drv.cpu_addr = ring->fence_cpu_addr;
 485                 ring->fence_drv.gpu_addr = ring->fence_gpu_addr;
 486         } else {
 487                 /* put fence directly behind firmware */
 488                 index = ALIGN(adev->uvd.fw->size, 8);
 489                 ring->fence_drv.cpu_addr = adev->uvd.inst[ring->me].cpu_addr + index;
 490                 ring->fence_drv.gpu_addr = adev->uvd.inst[ring->me].gpu_addr + index;
 491         }
 492         amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
 493
 494         ring->fence_drv.irq_src = irq_src;
 495         ring->fence_drv.irq_type = irq_type;
 496         ring->fence_drv.initialized = true;
 497
 498         DRM_DEV_DEBUG(adev->dev, "fence driver on ring %s use gpu addr 0x%016llx\n",
 499                       ring->name, ring->fence_drv.gpu_addr);
 500         return 0;
 501 }
 502
 503 /**
 504  * amdgpu_fence_driver_init_ring - init the fence driver
 505  * for the requested ring.
 506  *
 507  * @ring: ring to init the fence driver on
 508  *
 509  * Init the fence driver for the requested ring (all asics).
 510  * Helper function for amdgpu_fence_driver_init().
 511  */
 512 int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
 513 {
 514         struct amdgpu_device *adev = ring->adev;
 515
 516         if (!adev)
 517                 return -EINVAL;
 518
 519         if (!is_power_of_2(ring->num_hw_submission))
 520                 return -EINVAL;
 521
 522         ring->fence_drv.cpu_addr = NULL;
 523         ring->fence_drv.gpu_addr = 0;
 524         ring->fence_drv.sync_seq = 0;
 525         atomic_set(&ring->fence_drv.last_seq, 0);
 526         ring->fence_drv.initialized = false;
 527
 528         timer_setup(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, 0);
 529
 530         ring->fence_drv.num_fences_mask = ring->num_hw_submission * 2 - 1;
 531         spin_lock_init(&ring->fence_drv.lock);
 532         ring->fence_drv.fences = kcalloc(ring->num_hw_submission * 2, sizeof(void *),
 533                                          GFP_KERNEL);
 534
 535         if (!ring->fence_drv.fences)
 536                 return -ENOMEM;
 537
 538         return 0;
 539 }
 540
 541 /**
 542  * amdgpu_fence_driver_sw_init - init the fence driver
 543  * for all possible rings.
 544  *
 545  * @adev: amdgpu device pointer
 546  *
 547  * Init the fence driver for all possible rings (all asics).
 548  * Not all asics have all rings, so each asic will only
 549  * start the fence driver on the rings it has using
 550  * amdgpu_fence_driver_start_ring().
 551  * Returns 0 for success.
 552  */
 553 int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev)
 554 {
 555         return 0;
 556 }
 557
 558 /**
 559  * amdgpu_fence_driver_hw_fini - tear down the fence driver
 560  * for all possible rings.
 561  *
 562  * @adev: amdgpu device pointer
 563  *
 564  * Tear down the fence driver for all possible rings (all asics).
 565  */
 566 void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
 567 {
 568         int i, r;
 569
 570         for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
 571                 struct amdgpu_ring *ring = adev->rings[i];
 572
 573                 if (!ring || !ring->fence_drv.initialized)
 574                         continue;
 575
 576                 /* You can't wait for HW to signal if it's gone */
 577                 if (!drm_dev_is_unplugged(adev_to_drm(adev)))
 578                         r = amdgpu_fence_wait_empty(ring);
 579                 else
 580                         r = -ENODEV;
 581                 /* no need to trigger GPU reset as we are unloading */
 582                 if (r)
 583                         amdgpu_fence_driver_force_completion(ring);
 584
 585                 if (!drm_dev_is_unplugged(adev_to_drm(adev)) &&
 586                     ring->fence_drv.irq_src)
 587                         amdgpu_irq_put(adev, ring->fence_drv.irq_src,
 588                                        ring->fence_drv.irq_type);
 589
 590                 del_timer_sync(&ring->fence_drv.fallback_timer);
 591         }
 592 }
 593
 594 /* Will either stop and flush handlers for amdgpu interrupt or reanble it */
 595 void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop)
 596 {
 597         int i;
 598
 599         for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
 600                 struct amdgpu_ring *ring = adev->rings[i];
 601
 602                 if (!ring || !ring->fence_drv.initialized || !ring->fence_drv.irq_src)
 603                         continue;
 604
 605                 if (stop)
 606                         disable_irq(adev->irq.irq);
 607                 else
 608                         enable_irq(adev->irq.irq);
 609         }
 610 }
 611
 612 void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev)
 613 {
 614         unsigned int i, j;
 615
 616         for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
 617                 struct amdgpu_ring *ring = adev->rings[i];
 618
 619                 if (!ring || !ring->fence_drv.initialized)
 620                         continue;
 621
 622                 /*
 623                  * Notice we check for sched.ops since there's some
 624                  * override on the meaning of sched.ready by amdgpu.
 625                  * The natural check would be sched.ready, which is
 626                  * set as drm_sched_init() finishes...
 627                  */
 628                 if (ring->sched.ops)
 629                         drm_sched_fini(&ring->sched);
 630
 631                 for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
 632                         dma_fence_put(ring->fence_drv.fences[j]);
 633                 kfree(ring->fence_drv.fences);
 634                 ring->fence_drv.fences = NULL;
 635                 ring->fence_drv.initialized = false;
 636         }
 637 }
 638
 639 /**
 640  * amdgpu_fence_driver_hw_init - enable the fence driver
 641  * for all possible rings.
 642  *
 643  * @adev: amdgpu device pointer
 644  *
 645  * Enable the fence driver for all possible rings (all asics).
 646  * Not all asics have all rings, so each asic will only
 647  * start the fence driver on the rings it has using
 648  * amdgpu_fence_driver_start_ring().
 649  * Returns 0 for success.
 650  */
 651 void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
 652 {
 653         int i;
 654
 655         for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
 656                 struct amdgpu_ring *ring = adev->rings[i];
 657                 if (!ring || !ring->fence_drv.initialized)
 658                         continue;
 659
 660                 /* enable the interrupt */
 661                 if (ring->fence_drv.irq_src)
 662                         amdgpu_irq_get(adev, ring->fence_drv.irq_src,
 663                                        ring->fence_drv.irq_type);
 664         }
 665 }
 666
 667 /**
 668  * amdgpu_fence_driver_clear_job_fences - clear job embedded fences of ring
 669  *
 670  * @ring: fence of the ring to be cleared
 671  *
 672  */
 673 void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring)
 674 {
 675         int i;
 676         struct dma_fence *old, **ptr;
 677
 678         for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) {
 679                 ptr = &ring->fence_drv.fences[i];
 680                 old = rcu_dereference_protected(*ptr, 1);
 681                 if (old && old->ops == &amdgpu_job_fence_ops) {
 682                         struct amdgpu_job *job;
 683
 684                         /* For non-scheduler bad job, i.e. failed ib test, we need to signal
 685                          * it right here or we won't be able to track them in fence_drv
 686                          * and they will remain unsignaled during sa_bo free.
 687                          */
 688                         job = container_of(old, struct amdgpu_job, hw_fence);
 689                         if (!job->base.s_fence && !dma_fence_is_signaled(old))
 690                                 dma_fence_signal(old);
 691                         RCU_INIT_POINTER(*ptr, NULL);
 692                         dma_fence_put(old);
 693                 }
 694         }
 695 }
 696
 697 /**
 698  * amdgpu_fence_driver_force_completion - force signal latest fence of ring
 699  *
 700  * @ring: fence of the ring to signal
 701  *
 702  */
 703 void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring)
 704 {
 705         amdgpu_fence_write(ring, ring->fence_drv.sync_seq);
 706         amdgpu_fence_process(ring);
 707 }
 708
 709 /*
 710  * Common fence implementation
 711  */
 712
 713 static const char *amdgpu_fence_get_driver_name(struct dma_fence *fence)
 714 {
 715         return "amdgpu";
 716 }
 717
 718 static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
 719 {
 720         return (const char *)to_amdgpu_fence(f)->ring->name;
 721 }
 722
 723 static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f)
 724 {
 725         struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);
 726
 727         return (const char *)to_amdgpu_ring(job->base.sched)->name;
 728 }
 729
 730 /**
 731  * amdgpu_fence_enable_signaling - enable signalling on fence
 732  * @f: fence
 733  *
 734  * This function is called with fence_queue lock held, and adds a callback
 735  * to fence_queue that checks if this fence is signaled, and if so it
 736  * signals the fence and removes itself.
 737  */
 738 static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
 739 {
 740         if (!timer_pending(&to_amdgpu_fence(f)->ring->fence_drv.fallback_timer))
 741                 amdgpu_fence_schedule_fallback(to_amdgpu_fence(f)->ring);
 742
 743         return true;
 744 }
 745
 746 /**
 747  * amdgpu_job_fence_enable_signaling - enable signalling on job fence
 748  * @f: fence
 749  *
 750  * This is the simliar function with amdgpu_fence_enable_signaling above, it
 751  * only handles the job embedded fence.
 752  */
 753 static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f)
 754 {
 755         struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);
 756
 757         if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer))
 758                 amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched));
 759
 760         return true;
 761 }
 762
 763 /**
 764  * amdgpu_fence_free - free up the fence memory
 765  *
 766  * @rcu: RCU callback head
 767  *
 768  * Free up the fence memory after the RCU grace period.
 769  */
 770 static void amdgpu_fence_free(struct rcu_head *rcu)
 771 {
 772         struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
 773
 774         /* free fence_slab if it's separated fence*/
 775         kmem_cache_free(amdgpu_fence_slab, to_amdgpu_fence(f));
 776 }
 777
 778 /**
 779  * amdgpu_job_fence_free - free up the job with embedded fence
 780  *
 781  * @rcu: RCU callback head
 782  *
 783  * Free up the job with embedded fence after the RCU grace period.
 784  */
 785 static void amdgpu_job_fence_free(struct rcu_head *rcu)
 786 {
 787         struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
 788
 789         /* free job if fence has a parent job */
 790         kfree(container_of(f, struct amdgpu_job, hw_fence));
 791 }
 792
 793 /**
 794  * amdgpu_fence_release - callback that fence can be freed
 795  *
 796  * @f: fence
 797  *
 798  * This function is called when the reference count becomes zero.
 799  * It just RCU schedules freeing up the fence.
 800  */
 801 static void amdgpu_fence_release(struct dma_fence *f)
 802 {
 803         call_rcu(&f->rcu, amdgpu_fence_free);
 804 }
 805
 806 /**
 807  * amdgpu_job_fence_release - callback that job embedded fence can be freed
 808  *
 809  * @f: fence
 810  *
 811  * This is the simliar function with amdgpu_fence_release above, it
 812  * only handles the job embedded fence.
 813  */
 814 static void amdgpu_job_fence_release(struct dma_fence *f)
 815 {
 816         call_rcu(&f->rcu, amdgpu_job_fence_free);
 817 }
 818
 819 static const struct dma_fence_ops amdgpu_fence_ops = {
 820         .get_driver_name = amdgpu_fence_get_driver_name,
 821         .get_timeline_name = amdgpu_fence_get_timeline_name,
 822         .enable_signaling = amdgpu_fence_enable_signaling,
 823         .release = amdgpu_fence_release,
 824 };
 825
 826 static const struct dma_fence_ops amdgpu_job_fence_ops = {
 827         .get_driver_name = amdgpu_fence_get_driver_name,
 828         .get_timeline_name = amdgpu_job_fence_get_timeline_name,
 829         .enable_signaling = amdgpu_job_fence_enable_signaling,
 830         .release = amdgpu_job_fence_release,
 831 };
 832
 833 /*
 834  * Fence debugfs
 835  */
 836 #if defined(CONFIG_DEBUG_FS)
 837 static int amdgpu_debugfs_fence_info_show(struct seq_file *m, void *unused)
 838 {
 839         struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
 840         int i;
 841
 842         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 843                 struct amdgpu_ring *ring = adev->rings[i];
 844                 if (!ring || !ring->fence_drv.initialized)
 845                         continue;
 846
 847                 amdgpu_fence_process(ring);
 848
 849                 seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name);
 850                 seq_printf(m, "Last signaled fence          0x%08x\n",
 851                            atomic_read(&ring->fence_drv.last_seq));
 852                 seq_printf(m, "Last emitted                 0x%08x\n",
 853                            ring->fence_drv.sync_seq);
 854
 855                 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX ||
 856                     ring->funcs->type == AMDGPU_RING_TYPE_SDMA) {
 857                         seq_printf(m, "Last signaled trailing fence 0x%08x\n",
 858                                    le32_to_cpu(*ring->trail_fence_cpu_addr));
 859                         seq_printf(m, "Last emitted                 0x%08x\n",
 860                                    ring->trail_seq);
 861                 }
 862
 863                 if (ring->funcs->type != AMDGPU_RING_TYPE_GFX)
 864                         continue;
 865
 866                 /* set in CP_VMID_PREEMPT and preemption occurred */
 867                 seq_printf(m, "Last preempted               0x%08x\n",
 868                            le32_to_cpu(*(ring->fence_drv.cpu_addr + 2)));
 869                 /* set in CP_VMID_RESET and reset occurred */
 870                 seq_printf(m, "Last reset                   0x%08x\n",
 871                            le32_to_cpu(*(ring->fence_drv.cpu_addr + 4)));
 872                 /* Both preemption and reset occurred */
 873                 seq_printf(m, "Last both                    0x%08x\n",
 874                            le32_to_cpu(*(ring->fence_drv.cpu_addr + 6)));
 875         }
 876         return 0;
 877 }
 878
 879 /*
 880  * amdgpu_debugfs_gpu_recover - manually trigger a gpu reset & recover
 881  *
 882  * Manually trigger a gpu reset at the next fence wait.
 883  */
 884 static int gpu_recover_get(void *data, u64 *val)
 885 {
 886         struct amdgpu_device *adev = (struct amdgpu_device *)data;
 887         struct drm_device *dev = adev_to_drm(adev);
 888         int r;
 889
 890         r = pm_runtime_get_sync(dev->dev);
 891         if (r < 0) {
 892                 pm_runtime_put_autosuspend(dev->dev);
 893                 return 0;
 894         }
 895
 896         if (amdgpu_reset_domain_schedule(adev->reset_domain, &adev->reset_work))
 897                 flush_work(&adev->reset_work);
 898
 899         *val = atomic_read(&adev->reset_domain->reset_res);
 900
 901         pm_runtime_mark_last_busy(dev->dev);
 902         pm_runtime_put_autosuspend(dev->dev);
 903
 904         return 0;
 905 }
 906
 907 DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_fence_info);
 908 DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gpu_recover_fops, gpu_recover_get, NULL,
 909                          "%lld\n");
 910
 911 static void amdgpu_debugfs_reset_work(struct work_struct *work)
 912 {
 913         struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
 914                                                   reset_work);
 915
 916         struct amdgpu_reset_context reset_context;
 917         memset(&reset_context, 0, sizeof(reset_context));
 918
 919         reset_context.method = AMD_RESET_METHOD_NONE;
 920         reset_context.reset_req_dev = adev;
 921         set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
 922
 923         amdgpu_device_gpu_recover(adev, NULL, &reset_context);
 924 }
 925
 926 #endif
 927
 928 void amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
 929 {
 930 #if defined(CONFIG_DEBUG_FS)
 931         struct drm_minor *minor = adev_to_drm(adev)->primary;
 932         struct dentry *root = minor->debugfs_root;
 933
 934         debugfs_create_file("amdgpu_fence_info", 0444, root, adev,
 935                             &amdgpu_debugfs_fence_info_fops);
 936
 937         if (!amdgpu_sriov_vf(adev)) {
 938
 939                 INIT_WORK(&adev->reset_work, amdgpu_debugfs_reset_work);
 940                 debugfs_create_file("amdgpu_gpu_recover", 0444, root, adev,
 941                                     &amdgpu_debugfs_gpu_recover_fops);
 942         }
 943 #endif
 944 }
 945