drivers/gpu/drm/i915/gt/intel_ring.c

   1 /*
   2  * SPDX-License-Identifier: MIT
   3  *
   4  * Copyright © 2019 Intel Corporation
   5  */
   6
   7 #include "gem/i915_gem_object.h"
   8 #include "i915_drv.h"
   9 #include "i915_vma.h"
  10 #include "intel_engine.h"
  11 #include "intel_ring.h"
  12 #include "intel_timeline.h"
  13
  14 unsigned int intel_ring_update_space(struct intel_ring *ring)
  15 {
  16         unsigned int space;
  17
  18         space = __intel_ring_space(ring->head, ring->emit, ring->size);
  19
  20         ring->space = space;
  21         return space;
  22 }
  23
  24 int intel_ring_pin(struct intel_ring *ring)
  25 {
  26         struct i915_vma *vma = ring->vma;
  27         unsigned int flags;
  28         void *addr;
  29         int ret;
  30
  31         if (atomic_fetch_inc(&ring->pin_count))
  32                 return 0;
  33
  34         /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
  35         flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
  36
  37         if (vma->obj->stolen)
  38                 flags |= PIN_MAPPABLE;
  39         else
  40                 flags |= PIN_HIGH;
  41
  42         ret = i915_ggtt_pin(vma, 0, flags);
  43         if (unlikely(ret))
  44                 goto err_unpin;
  45
  46         if (i915_vma_is_map_and_fenceable(vma))
  47                 addr = (void __force *)i915_vma_pin_iomap(vma);
  48         else
  49                 addr = i915_gem_object_pin_map(vma->obj,
  50                                                i915_coherent_map_type(vma->vm->i915));
  51         if (IS_ERR(addr)) {
  52                 ret = PTR_ERR(addr);
  53                 goto err_ring;
  54         }
  55
  56         i915_vma_make_unshrinkable(vma);
  57
  58         /* Discard any unused bytes beyond that submitted to hw. */
  59         intel_ring_reset(ring, ring->emit);
  60
  61         ring->vaddr = addr;
  62         return 0;
  63
  64 err_ring:
  65         i915_vma_unpin(vma);
  66 err_unpin:
  67         atomic_dec(&ring->pin_count);
  68         return ret;
  69 }
  70
  71 void intel_ring_reset(struct intel_ring *ring, u32 tail)
  72 {
  73         tail = intel_ring_wrap(ring, tail);
  74         ring->tail = tail;
  75         ring->head = tail;
  76         ring->emit = tail;
  77         intel_ring_update_space(ring);
  78 }
  79
  80 void intel_ring_unpin(struct intel_ring *ring)
  81 {
  82         struct i915_vma *vma = ring->vma;
  83
  84         if (!atomic_dec_and_test(&ring->pin_count))
  85                 return;
  86
  87         i915_vma_unset_ggtt_write(vma);
  88         if (i915_vma_is_map_and_fenceable(vma))
  89                 i915_vma_unpin_iomap(vma);
  90         else
  91                 i915_gem_object_unpin_map(vma->obj);
  92
  93         i915_vma_make_purgeable(vma);
  94         i915_vma_unpin(vma);
  95 }
  96
  97 static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
  98 {
  99         struct i915_address_space *vm = &ggtt->vm;
 100         struct drm_i915_private *i915 = vm->i915;
 101         struct drm_i915_gem_object *obj;
 102         struct i915_vma *vma;
 103
 104         obj = ERR_PTR(-ENODEV);
 105         if (i915_ggtt_has_aperture(ggtt))
 106                 obj = i915_gem_object_create_stolen(i915, size);
 107         if (IS_ERR(obj))
 108                 obj = i915_gem_object_create_internal(i915, size);
 109         if (IS_ERR(obj))
 110                 return ERR_CAST(obj);
 111
 112         /*
 113          * Mark ring buffers as read-only from GPU side (so no stray overwrites)
 114          * if supported by the platform's GGTT.
 115          */
 116         if (vm->has_read_only)
 117                 i915_gem_object_set_readonly(obj);
 118
 119         vma = i915_vma_instance(obj, vm, NULL);
 120         if (IS_ERR(vma))
 121                 goto err;
 122
 123         return vma;
 124
 125 err:
 126         i915_gem_object_put(obj);
 127         return vma;
 128 }
 129
 130 struct intel_ring *
 131 intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 132 {
 133         struct drm_i915_private *i915 = engine->i915;
 134         struct intel_ring *ring;
 135         struct i915_vma *vma;
 136
 137         GEM_BUG_ON(!is_power_of_2(size));
 138         GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
 139
 140         ring = kzalloc(sizeof(*ring), GFP_KERNEL);
 141         if (!ring)
 142                 return ERR_PTR(-ENOMEM);
 143
 144         kref_init(&ring->ref);
 145         ring->size = size;
 146         ring->wrap = BITS_PER_TYPE(ring->size) - ilog2(size);
 147
 148         /*
 149          * Workaround an erratum on the i830 which causes a hang if
 150          * the TAIL pointer points to within the last 2 cachelines
 151          * of the buffer.
 152          */
 153         ring->effective_size = size;
 154         if (IS_I830(i915) || IS_I845G(i915))
 155                 ring->effective_size -= 2 * CACHELINE_BYTES;
 156
 157         intel_ring_update_space(ring);
 158
 159         vma = create_ring_vma(engine->gt->ggtt, size);
 160         if (IS_ERR(vma)) {
 161                 kfree(ring);
 162                 return ERR_CAST(vma);
 163         }
 164         ring->vma = vma;
 165
 166         return ring;
 167 }
 168
 169 void intel_ring_free(struct kref *ref)
 170 {
 171         struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
 172
 173         i915_vma_put(ring->vma);
 174         kfree(ring);
 175 }
 176
 177 static noinline int
 178 wait_for_space(struct intel_ring *ring,
 179                struct intel_timeline *tl,
 180                unsigned int bytes)
 181 {
 182         struct i915_request *target;
 183         long timeout;
 184
 185         if (intel_ring_update_space(ring) >= bytes)
 186                 return 0;
 187
 188         GEM_BUG_ON(list_empty(&tl->requests));
 189         list_for_each_entry(target, &tl->requests, link) {
 190                 if (target->ring != ring)
 191                         continue;
 192
 193                 /* Would completion of this request free enough space? */
 194                 if (bytes <= __intel_ring_space(target->postfix,
 195                                                 ring->emit, ring->size))
 196                         break;
 197         }
 198
 199         if (GEM_WARN_ON(&target->link == &tl->requests))
 200                 return -ENOSPC;
 201
 202         timeout = i915_request_wait(target,
 203                                     I915_WAIT_INTERRUPTIBLE,
 204                                     MAX_SCHEDULE_TIMEOUT);
 205         if (timeout < 0)
 206                 return timeout;
 207
 208         i915_request_retire_upto(target);
 209
 210         intel_ring_update_space(ring);
 211         GEM_BUG_ON(ring->space < bytes);
 212         return 0;
 213 }
 214
 215 u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
 216 {
 217         struct intel_ring *ring = rq->ring;
 218         const unsigned int remain_usable = ring->effective_size - ring->emit;
 219         const unsigned int bytes = num_dwords * sizeof(u32);
 220         unsigned int need_wrap = 0;
 221         unsigned int total_bytes;
 222         u32 *cs;
 223
 224         /* Packets must be qword aligned. */
 225         GEM_BUG_ON(num_dwords & 1);
 226
 227         total_bytes = bytes + rq->reserved_space;
 228         GEM_BUG_ON(total_bytes > ring->effective_size);
 229
 230         if (unlikely(total_bytes > remain_usable)) {
 231                 const int remain_actual = ring->size - ring->emit;
 232
 233                 if (bytes > remain_usable) {
 234                         /*
 235                          * Not enough space for the basic request. So need to
 236                          * flush out the remainder and then wait for
 237                          * base + reserved.
 238                          */
 239                         total_bytes += remain_actual;
 240                         need_wrap = remain_actual | 1;
 241                 } else  {
 242                         /*
 243                          * The base request will fit but the reserved space
 244                          * falls off the end. So we don't need an immediate
 245                          * wrap and only need to effectively wait for the
 246                          * reserved size from the start of ringbuffer.
 247                          */
 248                         total_bytes = rq->reserved_space + remain_actual;
 249                 }
 250         }
 251
 252         if (unlikely(total_bytes > ring->space)) {
 253                 int ret;
 254
 255                 /*
 256                  * Space is reserved in the ringbuffer for finalising the
 257                  * request, as that cannot be allowed to fail. During request
 258                  * finalisation, reserved_space is set to 0 to stop the
 259                  * overallocation and the assumption is that then we never need
 260                  * to wait (which has the risk of failing with EINTR).
 261                  *
 262                  * See also i915_request_alloc() and i915_request_add().
 263                  */
 264                 GEM_BUG_ON(!rq->reserved_space);
 265
 266                 ret = wait_for_space(ring,
 267                                      i915_request_timeline(rq),
 268                                      total_bytes);
 269                 if (unlikely(ret))
 270                         return ERR_PTR(ret);
 271         }
 272
 273         if (unlikely(need_wrap)) {
 274                 need_wrap &= ~1;
 275                 GEM_BUG_ON(need_wrap > ring->space);
 276                 GEM_BUG_ON(ring->emit + need_wrap > ring->size);
 277                 GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
 278
 279                 /* Fill the tail with MI_NOOP */
 280                 memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
 281                 ring->space -= need_wrap;
 282                 ring->emit = 0;
 283         }
 284
 285         GEM_BUG_ON(ring->emit > ring->size - bytes);
 286         GEM_BUG_ON(ring->space < bytes);
 287         cs = ring->vaddr + ring->emit;
 288         GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
 289         ring->emit += bytes;
 290         ring->space -= bytes;
 291
 292         return cs;
 293 }
 294
 295 /* Align the ring tail to a cacheline boundary */
 296 int intel_ring_cacheline_align(struct i915_request *rq)
 297 {
 298         int num_dwords;
 299         void *cs;
 300
 301         num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
 302         if (num_dwords == 0)
 303                 return 0;
 304
 305         num_dwords = CACHELINE_DWORDS - num_dwords;
 306         GEM_BUG_ON(num_dwords & 1);
 307
 308         cs = intel_ring_begin(rq, num_dwords);
 309         if (IS_ERR(cs))
 310                 return PTR_ERR(cs);
 311
 312         memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
 313         intel_ring_advance(rq, cs + num_dwords);
 314
 315         GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
 316         return 0;
 317 }