2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Xiang Haihao <haihao.xiang@intel.com>
32 #include "intel_batchbuffer.h"
33 #include "intel_driver.h"
35 #include "i965_gpe_utils.h"
38 i965_gpe_select(VADriverContextP ctx,
39 struct i965_gpe_context *gpe_context,
40 struct intel_batchbuffer *batch)
42 BEGIN_BATCH(batch, 1);
43 OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
48 gen6_gpe_state_base_address(VADriverContextP ctx,
49 struct i965_gpe_context *gpe_context,
50 struct intel_batchbuffer *batch)
52 BEGIN_BATCH(batch, 10);
54 OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
55 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General State Base Address */
57 gpe_context->surface_state_binding_table.bo,
58 I915_GEM_DOMAIN_INSTRUCTION,
60 BASE_ADDRESS_MODIFY); /* Surface state base address */
61 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic State Base Address */
62 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect Object Base Address */
63 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction Base Address */
64 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General State Access Upper Bound */
65 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic State Access Upper Bound */
66 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect Object Access Upper Bound */
67 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction Access Upper Bound */
73 gen6_gpe_vfe_state(VADriverContextP ctx,
74 struct i965_gpe_context *gpe_context,
75 struct intel_batchbuffer *batch)
78 BEGIN_BATCH(batch, 8);
80 OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
81 OUT_BATCH(batch, 0); /* Scratch Space Base Pointer and Space */
83 gpe_context->vfe_state.max_num_threads << 16 | /* Maximum Number of Threads */
84 gpe_context->vfe_state.num_urb_entries << 8 | /* Number of URB Entries */
85 gpe_context->vfe_state.gpgpu_mode << 2); /* MEDIA Mode */
86 OUT_BATCH(batch, 0); /* Debug: Object ID */
88 gpe_context->vfe_state.urb_entry_size << 16 | /* URB Entry Allocation Size */
89 gpe_context->vfe_state.curbe_allocation_size); /* CURBE Allocation Size */
90 /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */
91 OUT_BATCH(batch, gpe_context->vfe_desc5.dword);
92 OUT_BATCH(batch, gpe_context->vfe_desc6.dword);
93 OUT_BATCH(batch, gpe_context->vfe_desc7.dword);
100 gen6_gpe_curbe_load(VADriverContextP ctx,
101 struct i965_gpe_context *gpe_context,
102 struct intel_batchbuffer *batch)
104 BEGIN_BATCH(batch, 4);
106 OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
108 OUT_BATCH(batch, gpe_context->curbe.length);
109 OUT_RELOC(batch, gpe_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
111 ADVANCE_BATCH(batch);
115 gen6_gpe_idrt(VADriverContextP ctx,
116 struct i965_gpe_context *gpe_context,
117 struct intel_batchbuffer *batch)
119 BEGIN_BATCH(batch, 4);
121 OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
123 OUT_BATCH(batch, gpe_context->idrt.max_entries * gpe_context->idrt.entry_size);
124 OUT_RELOC(batch, gpe_context->idrt.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
126 ADVANCE_BATCH(batch);
130 i965_gpe_load_kernels(VADriverContextP ctx,
131 struct i965_gpe_context *gpe_context,
132 struct i965_kernel *kernel_list,
133 unsigned int num_kernels)
135 struct i965_driver_data *i965 = i965_driver_data(ctx);
138 assert(num_kernels <= MAX_GPE_KERNELS);
139 memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
140 gpe_context->num_kernels = num_kernels;
142 for (i = 0; i < num_kernels; i++) {
143 struct i965_kernel *kernel = &gpe_context->kernels[i];
145 kernel->bo = dri_bo_alloc(i965->intel.bufmgr,
150 dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
155 i965_gpe_context_destroy(struct i965_gpe_context *gpe_context)
159 dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
160 gpe_context->surface_state_binding_table.bo = NULL;
162 dri_bo_unreference(gpe_context->idrt.bo);
163 gpe_context->idrt.bo = NULL;
165 dri_bo_unreference(gpe_context->curbe.bo);
166 gpe_context->curbe.bo = NULL;
168 for (i = 0; i < gpe_context->num_kernels; i++) {
169 struct i965_kernel *kernel = &gpe_context->kernels[i];
171 dri_bo_unreference(kernel->bo);
177 i965_gpe_context_init(VADriverContextP ctx,
178 struct i965_gpe_context *gpe_context)
180 struct i965_driver_data *i965 = i965_driver_data(ctx);
183 dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
184 bo = dri_bo_alloc(i965->intel.bufmgr,
185 "surface state & binding table",
186 gpe_context->surface_state_binding_table.length,
189 gpe_context->surface_state_binding_table.bo = bo;
191 dri_bo_unreference(gpe_context->idrt.bo);
192 bo = dri_bo_alloc(i965->intel.bufmgr,
193 "interface descriptor table",
194 gpe_context->idrt.entry_size * gpe_context->idrt.max_entries,
197 gpe_context->idrt.bo = bo;
199 dri_bo_unreference(gpe_context->curbe.bo);
200 bo = dri_bo_alloc(i965->intel.bufmgr,
202 gpe_context->curbe.length,
205 gpe_context->curbe.bo = bo;
209 gen6_gpe_pipeline_setup(VADriverContextP ctx,
210 struct i965_gpe_context *gpe_context,
211 struct intel_batchbuffer *batch)
213 intel_batchbuffer_emit_mi_flush(batch);
215 i965_gpe_select(ctx, gpe_context, batch);
216 gen6_gpe_state_base_address(ctx, gpe_context, batch);
217 gen6_gpe_vfe_state(ctx, gpe_context, batch);
218 gen6_gpe_curbe_load(ctx, gpe_context, batch);
219 gen6_gpe_idrt(ctx, gpe_context, batch);
223 i965_gpe_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
226 case I915_TILING_NONE:
227 ss->ss3.tiled_surface = 0;
228 ss->ss3.tile_walk = 0;
231 ss->ss3.tiled_surface = 1;
232 ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
235 ss->ss3.tiled_surface = 1;
236 ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
242 i965_gpe_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
245 case I915_TILING_NONE:
246 ss->ss2.tiled_surface = 0;
247 ss->ss2.tile_walk = 0;
250 ss->ss2.tiled_surface = 1;
251 ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
254 ss->ss2.tiled_surface = 1;
255 ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
261 gen7_gpe_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
264 case I915_TILING_NONE:
265 ss->ss0.tiled_surface = 0;
266 ss->ss0.tile_walk = 0;
269 ss->ss0.tiled_surface = 1;
270 ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
273 ss->ss0.tiled_surface = 1;
274 ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
280 gen7_gpe_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
283 case I915_TILING_NONE:
284 ss->ss2.tiled_surface = 0;
285 ss->ss2.tile_walk = 0;
288 ss->ss2.tiled_surface = 1;
289 ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
292 ss->ss2.tiled_surface = 1;
293 ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
299 i965_gpe_set_surface2_state(VADriverContextP ctx,
300 struct object_surface *obj_surface,
301 struct i965_surface_state2 *ss)
304 unsigned int tiling, swizzle;
306 assert(obj_surface->bo);
307 assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'));
309 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
310 w = obj_surface->orig_width;
311 h = obj_surface->orig_height;
312 w_pitch = obj_surface->width;
314 memset(ss, 0, sizeof(*ss));
316 ss->ss0.surface_base_address = obj_surface->bo->offset;
318 ss->ss1.cbcr_pixel_offset_v_direction = 2;
319 ss->ss1.width = w - 1;
320 ss->ss1.height = h - 1;
322 ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
323 ss->ss2.interleave_chroma = 1;
324 ss->ss2.pitch = w_pitch - 1;
325 ss->ss2.half_pitch_for_chroma = 0;
326 i965_gpe_set_surface2_tiling(ss, tiling);
327 /* ss3: UV offset for interleave mode */
328 ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
329 ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
333 i965_gpe_surface2_setup(VADriverContextP ctx,
334 struct i965_gpe_context *gpe_context,
335 struct object_surface *obj_surface,
336 unsigned long binding_table_offset,
337 unsigned long surface_state_offset)
339 struct i965_surface_state2 *ss;
342 bo = gpe_context->surface_state_binding_table.bo;
346 ss = (struct i965_surface_state2 *)((char *)bo->virtual + surface_state_offset);
347 i965_gpe_set_surface2_state(ctx, obj_surface, ss);
348 dri_bo_emit_reloc(bo,
349 I915_GEM_DOMAIN_RENDER, 0,
351 surface_state_offset + offsetof(struct i965_surface_state2, ss0),
354 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
359 i965_gpe_set_media_rw_surface_state(VADriverContextP ctx,
360 struct object_surface *obj_surface,
361 struct i965_surface_state *ss)
364 unsigned int tiling, swizzle;
366 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
367 w = obj_surface->orig_width;
368 h = obj_surface->orig_height;
369 w_pitch = obj_surface->width;
371 memset(ss, 0, sizeof(*ss));
373 ss->ss0.surface_type = I965_SURFACE_2D;
374 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
376 ss->ss1.base_addr = obj_surface->bo->offset;
378 ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
379 ss->ss2.height = h - 1;
381 ss->ss3.pitch = w_pitch - 1;
382 i965_gpe_set_surface_tiling(ss, tiling);
386 i965_gpe_media_rw_surface_setup(VADriverContextP ctx,
387 struct i965_gpe_context *gpe_context,
388 struct object_surface *obj_surface,
389 unsigned long binding_table_offset,
390 unsigned long surface_state_offset)
392 struct i965_surface_state *ss;
395 bo = gpe_context->surface_state_binding_table.bo;
396 dri_bo_map(bo, True);
399 ss = (struct i965_surface_state *)((char *)bo->virtual + surface_state_offset);
400 i965_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
401 dri_bo_emit_reloc(bo,
402 I915_GEM_DOMAIN_RENDER, 0,
404 surface_state_offset + offsetof(struct i965_surface_state, ss1),
407 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
412 i965_gpe_set_buffer_surface_state(VADriverContextP ctx,
413 struct i965_buffer_surface *buffer_surface,
414 struct i965_surface_state *ss)
418 assert(buffer_surface->bo);
419 num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
421 memset(ss, 0, sizeof(*ss));
423 ss->ss0.render_cache_read_mode = 1;
424 ss->ss0.surface_type = I965_SURFACE_BUFFER;
426 ss->ss1.base_addr = buffer_surface->bo->offset;
428 ss->ss2.width = ((num_entries - 1) & 0x7f);
429 ss->ss2.height = (((num_entries - 1) >> 7) & 0x1fff);
431 ss->ss3.depth = (((num_entries - 1) >> 20) & 0x7f);
432 ss->ss3.pitch = buffer_surface->pitch - 1;
436 i965_gpe_buffer_suface_setup(VADriverContextP ctx,
437 struct i965_gpe_context *gpe_context,
438 struct i965_buffer_surface *buffer_surface,
439 unsigned long binding_table_offset,
440 unsigned long surface_state_offset)
442 struct i965_surface_state *ss;
445 bo = gpe_context->surface_state_binding_table.bo;
449 ss = (struct i965_surface_state *)((char *)bo->virtual + surface_state_offset);
450 i965_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
451 dri_bo_emit_reloc(bo,
452 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
454 surface_state_offset + offsetof(struct i965_surface_state, ss1),
457 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
462 gen7_gpe_set_surface2_state(VADriverContextP ctx,
463 struct object_surface *obj_surface,
464 struct gen7_surface_state2 *ss)
467 unsigned int tiling, swizzle;
469 assert(obj_surface->bo);
470 assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'));
472 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
473 w = obj_surface->orig_width;
474 h = obj_surface->orig_height;
475 w_pitch = obj_surface->width;
477 memset(ss, 0, sizeof(*ss));
479 ss->ss0.surface_base_address = obj_surface->bo->offset;
481 ss->ss1.cbcr_pixel_offset_v_direction = 2;
482 ss->ss1.width = w - 1;
483 ss->ss1.height = h - 1;
485 ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
486 ss->ss2.interleave_chroma = 1;
487 ss->ss2.pitch = w_pitch - 1;
488 ss->ss2.half_pitch_for_chroma = 0;
489 gen7_gpe_set_surface2_tiling(ss, tiling);
490 /* ss3: UV offset for interleave mode */
491 ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
492 ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
496 gen7_gpe_surface2_setup(VADriverContextP ctx,
497 struct i965_gpe_context *gpe_context,
498 struct object_surface *obj_surface,
499 unsigned long binding_table_offset,
500 unsigned long surface_state_offset)
502 struct gen7_surface_state2 *ss;
505 bo = gpe_context->surface_state_binding_table.bo;
509 ss = (struct gen7_surface_state2 *)((char *)bo->virtual + surface_state_offset);
510 gen7_gpe_set_surface2_state(ctx, obj_surface, ss);
511 dri_bo_emit_reloc(bo,
512 I915_GEM_DOMAIN_RENDER, 0,
514 surface_state_offset + offsetof(struct gen7_surface_state2, ss0),
517 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
522 gen7_gpe_set_media_rw_surface_state(VADriverContextP ctx,
523 struct object_surface *obj_surface,
524 struct gen7_surface_state *ss)
527 unsigned int tiling, swizzle;
529 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
530 w = obj_surface->orig_width;
531 h = obj_surface->orig_height;
532 w_pitch = obj_surface->width;
534 memset(ss, 0, sizeof(*ss));
536 ss->ss0.surface_type = I965_SURFACE_2D;
537 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
539 ss->ss1.base_addr = obj_surface->bo->offset;
541 ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
542 ss->ss2.height = h - 1;
544 ss->ss3.pitch = w_pitch - 1;
545 gen7_gpe_set_surface_tiling(ss, tiling);
549 gen75_gpe_set_media_chroma_surface_state(VADriverContextP ctx,
550 struct object_surface *obj_surface,
551 struct gen7_surface_state *ss)
554 unsigned int tiling, swizzle;
557 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
558 w = obj_surface->orig_width;
559 w_pitch = obj_surface->width;
561 cbcr_offset = obj_surface->height * obj_surface->width;
562 memset(ss, 0, sizeof(*ss));
564 ss->ss0.surface_type = I965_SURFACE_2D;
565 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
567 ss->ss1.base_addr = obj_surface->bo->offset + cbcr_offset;
569 ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
570 ss->ss2.height = (obj_surface->height / 2) -1;
572 ss->ss3.pitch = w_pitch - 1;
573 gen7_gpe_set_surface_tiling(ss, tiling);
577 gen7_gpe_media_rw_surface_setup(VADriverContextP ctx,
578 struct i965_gpe_context *gpe_context,
579 struct object_surface *obj_surface,
580 unsigned long binding_table_offset,
581 unsigned long surface_state_offset)
583 struct gen7_surface_state *ss;
586 bo = gpe_context->surface_state_binding_table.bo;
587 dri_bo_map(bo, True);
590 ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
591 gen7_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
592 dri_bo_emit_reloc(bo,
593 I915_GEM_DOMAIN_RENDER, 0,
595 surface_state_offset + offsetof(struct gen7_surface_state, ss1),
598 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
603 gen75_gpe_media_chroma_surface_setup(VADriverContextP ctx,
604 struct i965_gpe_context *gpe_context,
605 struct object_surface *obj_surface,
606 unsigned long binding_table_offset,
607 unsigned long surface_state_offset)
609 struct gen7_surface_state *ss;
613 assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'));
614 bo = gpe_context->surface_state_binding_table.bo;
615 dri_bo_map(bo, True);
618 cbcr_offset = obj_surface->height * obj_surface->width;
619 ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
620 gen75_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss);
621 dri_bo_emit_reloc(bo,
622 I915_GEM_DOMAIN_RENDER, 0,
624 surface_state_offset + offsetof(struct gen7_surface_state, ss1),
627 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
633 gen7_gpe_set_buffer_surface_state(VADriverContextP ctx,
634 struct i965_buffer_surface *buffer_surface,
635 struct gen7_surface_state *ss)
639 assert(buffer_surface->bo);
640 num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
642 memset(ss, 0, sizeof(*ss));
644 ss->ss0.surface_type = I965_SURFACE_BUFFER;
646 ss->ss1.base_addr = buffer_surface->bo->offset;
648 ss->ss2.width = ((num_entries - 1) & 0x7f);
649 ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
651 ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
652 ss->ss3.pitch = buffer_surface->pitch - 1;
656 gen7_gpe_buffer_suface_setup(VADriverContextP ctx,
657 struct i965_gpe_context *gpe_context,
658 struct i965_buffer_surface *buffer_surface,
659 unsigned long binding_table_offset,
660 unsigned long surface_state_offset)
662 struct gen7_surface_state *ss;
665 bo = gpe_context->surface_state_binding_table.bo;
669 ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
670 gen7_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
671 dri_bo_emit_reloc(bo,
672 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
674 surface_state_offset + offsetof(struct gen7_surface_state, ss1),
677 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;