1c82caf525c34664cb9f27d65efe4e097c7a6a34
[platform/kernel/linux-starfive.git] / drivers / gpu / drm / i915 / gt / gen2_engine_cs.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5
6 #include "gen2_engine_cs.h"
7 #include "i915_drv.h"
8 #include "i915_reg.h"
9 #include "intel_engine.h"
10 #include "intel_engine_regs.h"
11 #include "intel_gpu_commands.h"
12 #include "intel_gt.h"
13 #include "intel_gt_irq.h"
14 #include "intel_ring.h"
15
16 int gen2_emit_flush(struct i915_request *rq, u32 mode)
17 {
18         unsigned int num_store_dw = 12;
19         u32 cmd, *cs;
20
21         cmd = MI_FLUSH;
22         if (mode & EMIT_INVALIDATE)
23                 cmd |= MI_READ_FLUSH;
24
25         cs = intel_ring_begin(rq, 2 + 4 * num_store_dw);
26         if (IS_ERR(cs))
27                 return PTR_ERR(cs);
28
29         *cs++ = cmd;
30         while (num_store_dw--) {
31                 *cs++ = MI_STORE_DWORD_INDEX;
32                 *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32);
33                 *cs++ = 0;
34                 *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH;
35         }
36         *cs++ = cmd;
37
38         intel_ring_advance(rq, cs);
39
40         return 0;
41 }
42
43 int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode)
44 {
45         u32 cmd, *cs;
46         int i;
47
48         /*
49          * read/write caches:
50          *
51          * I915_GEM_DOMAIN_RENDER is always invalidated, but is
52          * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
53          * also flushed at 2d versus 3d pipeline switches.
54          *
55          * read-only caches:
56          *
57          * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
58          * MI_READ_FLUSH is set, and is always flushed on 965.
59          *
60          * I915_GEM_DOMAIN_COMMAND may not exist?
61          *
62          * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
63          * invalidated when MI_EXE_FLUSH is set.
64          *
65          * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
66          * invalidated with every MI_FLUSH.
67          *
68          * TLBs:
69          *
70          * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
71          * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
72          * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
73          * are flushed at any MI_FLUSH.
74          */
75
76         cmd = MI_FLUSH;
77         if (mode & EMIT_INVALIDATE) {
78                 cmd |= MI_EXE_FLUSH;
79                 if (IS_G4X(rq->engine->i915) || GRAPHICS_VER(rq->engine->i915) == 5)
80                         cmd |= MI_INVALIDATE_ISP;
81         }
82
83         i = 2;
84         if (mode & EMIT_INVALIDATE)
85                 i += 20;
86
87         cs = intel_ring_begin(rq, i);
88         if (IS_ERR(cs))
89                 return PTR_ERR(cs);
90
91         *cs++ = cmd;
92
93         /*
94          * A random delay to let the CS invalidate take effect? Without this
95          * delay, the GPU relocation path fails as the CS does not see
96          * the updated contents. Just as important, if we apply the flushes
97          * to the EMIT_FLUSH branch (i.e. immediately after the relocation
98          * write and before the invalidate on the next batch), the relocations
99          * still fail. This implies that is a delay following invalidation
100          * that is required to reset the caches as opposed to a delay to
101          * ensure the memory is written.
102          */
103         if (mode & EMIT_INVALIDATE) {
104                 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
105                 *cs++ = intel_gt_scratch_offset(rq->engine->gt,
106                                                 INTEL_GT_SCRATCH_FIELD_DEFAULT) |
107                         PIPE_CONTROL_GLOBAL_GTT;
108                 *cs++ = 0;
109                 *cs++ = 0;
110
111                 for (i = 0; i < 12; i++)
112                         *cs++ = MI_FLUSH;
113
114                 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
115                 *cs++ = intel_gt_scratch_offset(rq->engine->gt,
116                                                 INTEL_GT_SCRATCH_FIELD_DEFAULT) |
117                         PIPE_CONTROL_GLOBAL_GTT;
118                 *cs++ = 0;
119                 *cs++ = 0;
120         }
121
122         *cs++ = cmd;
123
124         intel_ring_advance(rq, cs);
125
126         return 0;
127 }
128
129 int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode)
130 {
131         u32 *cs;
132
133         cs = intel_ring_begin(rq, 2);
134         if (IS_ERR(cs))
135                 return PTR_ERR(cs);
136
137         *cs++ = MI_FLUSH;
138         *cs++ = MI_NOOP;
139         intel_ring_advance(rq, cs);
140
141         return 0;
142 }
143
144 static u32 *__gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs,
145                                    int flush, int post)
146 {
147         GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
148         GEM_BUG_ON(offset_in_page(rq->hwsp_seqno) != I915_GEM_HWS_SEQNO_ADDR);
149
150         *cs++ = MI_FLUSH;
151
152         while (flush--) {
153                 *cs++ = MI_STORE_DWORD_INDEX;
154                 *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32);
155                 *cs++ = rq->fence.seqno;
156         }
157
158         while (post--) {
159                 *cs++ = MI_STORE_DWORD_INDEX;
160                 *cs++ = I915_GEM_HWS_SEQNO_ADDR;
161                 *cs++ = rq->fence.seqno;
162         }
163
164         *cs++ = MI_USER_INTERRUPT;
165
166         rq->tail = intel_ring_offset(rq, cs);
167         assert_ring_tail_valid(rq->ring, rq->tail);
168
169         return cs;
170 }
171
172 u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs)
173 {
174         return __gen2_emit_breadcrumb(rq, cs, 16, 8);
175 }
176
177 u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
178 {
179         return __gen2_emit_breadcrumb(rq, cs, 8, 8);
180 }
181
182 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
183 #define I830_BATCH_LIMIT SZ_256K
184 #define I830_TLB_ENTRIES (2)
185 #define I830_WA_SIZE max(I830_TLB_ENTRIES * SZ_4K, I830_BATCH_LIMIT)
186 int i830_emit_bb_start(struct i915_request *rq,
187                        u64 offset, u32 len,
188                        unsigned int dispatch_flags)
189 {
190         u32 *cs, cs_offset =
191                 intel_gt_scratch_offset(rq->engine->gt,
192                                         INTEL_GT_SCRATCH_FIELD_DEFAULT);
193
194         GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE);
195
196         cs = intel_ring_begin(rq, 6);
197         if (IS_ERR(cs))
198                 return PTR_ERR(cs);
199
200         /* Evict the invalid PTE TLBs */
201         *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA;
202         *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096;
203         *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */
204         *cs++ = cs_offset;
205         *cs++ = 0xdeadbeef;
206         *cs++ = MI_NOOP;
207         intel_ring_advance(rq, cs);
208
209         if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
210                 if (len > I830_BATCH_LIMIT)
211                         return -ENOSPC;
212
213                 cs = intel_ring_begin(rq, 6 + 2);
214                 if (IS_ERR(cs))
215                         return PTR_ERR(cs);
216
217                 /*
218                  * Blit the batch (which has now all relocs applied) to the
219                  * stable batch scratch bo area (so that the CS never
220                  * stumbles over its tlb invalidation bug) ...
221                  */
222                 *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
223                 *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096;
224                 *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096;
225                 *cs++ = cs_offset;
226                 *cs++ = 4096;
227                 *cs++ = offset;
228
229                 *cs++ = MI_FLUSH;
230                 *cs++ = MI_NOOP;
231                 intel_ring_advance(rq, cs);
232
233                 /* ... and execute it. */
234                 offset = cs_offset;
235         }
236
237         if (!(dispatch_flags & I915_DISPATCH_SECURE))
238                 offset |= MI_BATCH_NON_SECURE;
239
240         cs = intel_ring_begin(rq, 2);
241         if (IS_ERR(cs))
242                 return PTR_ERR(cs);
243
244         *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
245         *cs++ = offset;
246         intel_ring_advance(rq, cs);
247
248         return 0;
249 }
250
251 int gen3_emit_bb_start(struct i915_request *rq,
252                        u64 offset, u32 len,
253                        unsigned int dispatch_flags)
254 {
255         u32 *cs;
256
257         if (!(dispatch_flags & I915_DISPATCH_SECURE))
258                 offset |= MI_BATCH_NON_SECURE;
259
260         cs = intel_ring_begin(rq, 2);
261         if (IS_ERR(cs))
262                 return PTR_ERR(cs);
263
264         *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
265         *cs++ = offset;
266         intel_ring_advance(rq, cs);
267
268         return 0;
269 }
270
271 int gen4_emit_bb_start(struct i915_request *rq,
272                        u64 offset, u32 length,
273                        unsigned int dispatch_flags)
274 {
275         u32 security;
276         u32 *cs;
277
278         security = MI_BATCH_NON_SECURE_I965;
279         if (dispatch_flags & I915_DISPATCH_SECURE)
280                 security = 0;
281
282         cs = intel_ring_begin(rq, 2);
283         if (IS_ERR(cs))
284                 return PTR_ERR(cs);
285
286         *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | security;
287         *cs++ = offset;
288         intel_ring_advance(rq, cs);
289
290         return 0;
291 }
292
293 void gen2_irq_enable(struct intel_engine_cs *engine)
294 {
295         struct drm_i915_private *i915 = engine->i915;
296
297         i915->irq_mask &= ~engine->irq_enable_mask;
298         intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
299         ENGINE_POSTING_READ16(engine, RING_IMR);
300 }
301
302 void gen2_irq_disable(struct intel_engine_cs *engine)
303 {
304         struct drm_i915_private *i915 = engine->i915;
305
306         i915->irq_mask |= engine->irq_enable_mask;
307         intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
308 }
309
310 void gen3_irq_enable(struct intel_engine_cs *engine)
311 {
312         engine->i915->irq_mask &= ~engine->irq_enable_mask;
313         intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
314         intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR);
315 }
316
317 void gen3_irq_disable(struct intel_engine_cs *engine)
318 {
319         engine->i915->irq_mask |= engine->irq_enable_mask;
320         intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
321 }
322
323 void gen5_irq_enable(struct intel_engine_cs *engine)
324 {
325         gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask);
326 }
327
328 void gen5_irq_disable(struct intel_engine_cs *engine)
329 {
330         gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask);
331 }