drivers/gpu/drm/i915/gt/gen2_engine_cs.c

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright © 2020 Intel Corporation
   4  */
   5
   6 #include "gen2_engine_cs.h"
   7 #include "i915_drv.h"
   8 #include "i915_reg.h"
   9 #include "intel_engine.h"
  10 #include "intel_engine_regs.h"
  11 #include "intel_gpu_commands.h"
  12 #include "intel_gt.h"
  13 #include "intel_gt_irq.h"
  14 #include "intel_ring.h"
  15
  16 int gen2_emit_flush(struct i915_request *rq, u32 mode)
  17 {
  18         unsigned int num_store_dw = 12;
  19         u32 cmd, *cs;
  20
  21         cmd = MI_FLUSH;
  22         if (mode & EMIT_INVALIDATE)
  23                 cmd |= MI_READ_FLUSH;
  24
  25         cs = intel_ring_begin(rq, 2 + 4 * num_store_dw);
  26         if (IS_ERR(cs))
  27                 return PTR_ERR(cs);
  28
  29         *cs++ = cmd;
  30         while (num_store_dw--) {
  31                 *cs++ = MI_STORE_DWORD_INDEX;
  32                 *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32);
  33                 *cs++ = 0;
  34                 *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH;
  35         }
  36         *cs++ = cmd;
  37
  38         intel_ring_advance(rq, cs);
  39
  40         return 0;
  41 }
  42
  43 int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode)
  44 {
  45         u32 cmd, *cs;
  46         int i;
  47
  48         /*
  49          * read/write caches:
  50          *
  51          * I915_GEM_DOMAIN_RENDER is always invalidated, but is
  52          * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
  53          * also flushed at 2d versus 3d pipeline switches.
  54          *
  55          * read-only caches:
  56          *
  57          * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
  58          * MI_READ_FLUSH is set, and is always flushed on 965.
  59          *
  60          * I915_GEM_DOMAIN_COMMAND may not exist?
  61          *
  62          * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
  63          * invalidated when MI_EXE_FLUSH is set.
  64          *
  65          * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
  66          * invalidated with every MI_FLUSH.
  67          *
  68          * TLBs:
  69          *
  70          * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
  71          * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
  72          * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
  73          * are flushed at any MI_FLUSH.
  74          */
  75
  76         cmd = MI_FLUSH;
  77         if (mode & EMIT_INVALIDATE) {
  78                 cmd |= MI_EXE_FLUSH;
  79                 if (IS_G4X(rq->engine->i915) || GRAPHICS_VER(rq->engine->i915) == 5)
  80                         cmd |= MI_INVALIDATE_ISP;
  81         }
  82
  83         i = 2;
  84         if (mode & EMIT_INVALIDATE)
  85                 i += 20;
  86
  87         cs = intel_ring_begin(rq, i);
  88         if (IS_ERR(cs))
  89                 return PTR_ERR(cs);
  90
  91         *cs++ = cmd;
  92
  93         /*
  94          * A random delay to let the CS invalidate take effect? Without this
  95          * delay, the GPU relocation path fails as the CS does not see
  96          * the updated contents. Just as important, if we apply the flushes
  97          * to the EMIT_FLUSH branch (i.e. immediately after the relocation
  98          * write and before the invalidate on the next batch), the relocations
  99          * still fail. This implies that is a delay following invalidation
 100          * that is required to reset the caches as opposed to a delay to
 101          * ensure the memory is written.
 102          */
 103         if (mode & EMIT_INVALIDATE) {
 104                 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
 105                 *cs++ = intel_gt_scratch_offset(rq->engine->gt,
 106                                                 INTEL_GT_SCRATCH_FIELD_DEFAULT) |
 107                         PIPE_CONTROL_GLOBAL_GTT;
 108                 *cs++ = 0;
 109                 *cs++ = 0;
 110
 111                 for (i = 0; i < 12; i++)
 112                         *cs++ = MI_FLUSH;
 113
 114                 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
 115                 *cs++ = intel_gt_scratch_offset(rq->engine->gt,
 116                                                 INTEL_GT_SCRATCH_FIELD_DEFAULT) |
 117                         PIPE_CONTROL_GLOBAL_GTT;
 118                 *cs++ = 0;
 119                 *cs++ = 0;
 120         }
 121
 122         *cs++ = cmd;
 123
 124         intel_ring_advance(rq, cs);
 125
 126         return 0;
 127 }
 128
 129 int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode)
 130 {
 131         u32 *cs;
 132
 133         cs = intel_ring_begin(rq, 2);
 134         if (IS_ERR(cs))
 135                 return PTR_ERR(cs);
 136
 137         *cs++ = MI_FLUSH;
 138         *cs++ = MI_NOOP;
 139         intel_ring_advance(rq, cs);
 140
 141         return 0;
 142 }
 143
 144 static u32 *__gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs,
 145                                    int flush, int post)
 146 {
 147         GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
 148         GEM_BUG_ON(offset_in_page(rq->hwsp_seqno) != I915_GEM_HWS_SEQNO_ADDR);
 149
 150         *cs++ = MI_FLUSH;
 151
 152         while (flush--) {
 153                 *cs++ = MI_STORE_DWORD_INDEX;
 154                 *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32);
 155                 *cs++ = rq->fence.seqno;
 156         }
 157
 158         while (post--) {
 159                 *cs++ = MI_STORE_DWORD_INDEX;
 160                 *cs++ = I915_GEM_HWS_SEQNO_ADDR;
 161                 *cs++ = rq->fence.seqno;
 162         }
 163
 164         *cs++ = MI_USER_INTERRUPT;
 165
 166         rq->tail = intel_ring_offset(rq, cs);
 167         assert_ring_tail_valid(rq->ring, rq->tail);
 168
 169         return cs;
 170 }
 171
 172 u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 173 {
 174         return __gen2_emit_breadcrumb(rq, cs, 16, 8);
 175 }
 176
 177 u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 178 {
 179         return __gen2_emit_breadcrumb(rq, cs, 8, 8);
 180 }
 181
 182 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
 183 #define I830_BATCH_LIMIT SZ_256K
 184 #define I830_TLB_ENTRIES (2)
 185 #define I830_WA_SIZE max(I830_TLB_ENTRIES * SZ_4K, I830_BATCH_LIMIT)
 186 int i830_emit_bb_start(struct i915_request *rq,
 187                        u64 offset, u32 len,
 188                        unsigned int dispatch_flags)
 189 {
 190         u32 *cs, cs_offset =
 191                 intel_gt_scratch_offset(rq->engine->gt,
 192                                         INTEL_GT_SCRATCH_FIELD_DEFAULT);
 193
 194         GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE);
 195
 196         cs = intel_ring_begin(rq, 6);
 197         if (IS_ERR(cs))
 198                 return PTR_ERR(cs);
 199
 200         /* Evict the invalid PTE TLBs */
 201         *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA;
 202         *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096;
 203         *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */
 204         *cs++ = cs_offset;
 205         *cs++ = 0xdeadbeef;
 206         *cs++ = MI_NOOP;
 207         intel_ring_advance(rq, cs);
 208
 209         if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
 210                 if (len > I830_BATCH_LIMIT)
 211                         return -ENOSPC;
 212
 213                 cs = intel_ring_begin(rq, 6 + 2);
 214                 if (IS_ERR(cs))
 215                         return PTR_ERR(cs);
 216
 217                 /*
 218                  * Blit the batch (which has now all relocs applied) to the
 219                  * stable batch scratch bo area (so that the CS never
 220                  * stumbles over its tlb invalidation bug) ...
 221                  */
 222                 *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
 223                 *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096;
 224                 *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096;
 225                 *cs++ = cs_offset;
 226                 *cs++ = 4096;
 227                 *cs++ = offset;
 228
 229                 *cs++ = MI_FLUSH;
 230                 *cs++ = MI_NOOP;
 231                 intel_ring_advance(rq, cs);
 232
 233                 /* ... and execute it. */
 234                 offset = cs_offset;
 235         }
 236
 237         if (!(dispatch_flags & I915_DISPATCH_SECURE))
 238                 offset |= MI_BATCH_NON_SECURE;
 239
 240         cs = intel_ring_begin(rq, 2);
 241         if (IS_ERR(cs))
 242                 return PTR_ERR(cs);
 243
 244         *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
 245         *cs++ = offset;
 246         intel_ring_advance(rq, cs);
 247
 248         return 0;
 249 }
 250
 251 int gen3_emit_bb_start(struct i915_request *rq,
 252                        u64 offset, u32 len,
 253                        unsigned int dispatch_flags)
 254 {
 255         u32 *cs;
 256
 257         if (!(dispatch_flags & I915_DISPATCH_SECURE))
 258                 offset |= MI_BATCH_NON_SECURE;
 259
 260         cs = intel_ring_begin(rq, 2);
 261         if (IS_ERR(cs))
 262                 return PTR_ERR(cs);
 263
 264         *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
 265         *cs++ = offset;
 266         intel_ring_advance(rq, cs);
 267
 268         return 0;
 269 }
 270
 271 int gen4_emit_bb_start(struct i915_request *rq,
 272                        u64 offset, u32 length,
 273                        unsigned int dispatch_flags)
 274 {
 275         u32 security;
 276         u32 *cs;
 277
 278         security = MI_BATCH_NON_SECURE_I965;
 279         if (dispatch_flags & I915_DISPATCH_SECURE)
 280                 security = 0;
 281
 282         cs = intel_ring_begin(rq, 2);
 283         if (IS_ERR(cs))
 284                 return PTR_ERR(cs);
 285
 286         *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | security;
 287         *cs++ = offset;
 288         intel_ring_advance(rq, cs);
 289
 290         return 0;
 291 }
 292
 293 void gen2_irq_enable(struct intel_engine_cs *engine)
 294 {
 295         struct drm_i915_private *i915 = engine->i915;
 296
 297         i915->irq_mask &= ~engine->irq_enable_mask;
 298         intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
 299         ENGINE_POSTING_READ16(engine, RING_IMR);
 300 }
 301
 302 void gen2_irq_disable(struct intel_engine_cs *engine)
 303 {
 304         struct drm_i915_private *i915 = engine->i915;
 305
 306         i915->irq_mask |= engine->irq_enable_mask;
 307         intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
 308 }
 309
 310 void gen3_irq_enable(struct intel_engine_cs *engine)
 311 {
 312         engine->i915->irq_mask &= ~engine->irq_enable_mask;
 313         intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
 314         intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR);
 315 }
 316
 317 void gen3_irq_disable(struct intel_engine_cs *engine)
 318 {
 319         engine->i915->irq_mask |= engine->irq_enable_mask;
 320         intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
 321 }
 322
 323 void gen5_irq_enable(struct intel_engine_cs *engine)
 324 {
 325         gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask);
 326 }
 327
 328 void gen5_irq_disable(struct intel_engine_cs *engine)
 329 {
 330         gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask);
 331 }