2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Xiang Haihao <haihao.xiang@intel.com>
32 #include "intel_batchbuffer.h"
33 #include "intel_driver.h"
35 #include "i965_gpe_utils.h"
38 i965_gpe_select(VADriverContextP ctx,
39 struct i965_gpe_context *gpe_context,
40 struct intel_batchbuffer *batch)
42 BEGIN_BATCH(batch, 1);
43 OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
48 gen6_gpe_state_base_address(VADriverContextP ctx,
49 struct i965_gpe_context *gpe_context,
50 struct intel_batchbuffer *batch)
52 BEGIN_BATCH(batch, 10);
54 OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
55 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General State Base Address */
57 gpe_context->surface_state_binding_table.bo,
58 I915_GEM_DOMAIN_INSTRUCTION,
60 BASE_ADDRESS_MODIFY); /* Surface state base address */
61 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic State Base Address */
62 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect Object Base Address */
63 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction Base Address */
64 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General State Access Upper Bound */
65 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic State Access Upper Bound */
66 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect Object Access Upper Bound */
67 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction Access Upper Bound */
73 gen6_gpe_vfe_state(VADriverContextP ctx,
74 struct i965_gpe_context *gpe_context,
75 struct intel_batchbuffer *batch)
78 BEGIN_BATCH(batch, 8);
80 OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
81 OUT_BATCH(batch, 0); /* Scratch Space Base Pointer and Space */
83 gpe_context->vfe_state.max_num_threads << 16 | /* Maximum Number of Threads */
84 gpe_context->vfe_state.num_urb_entries << 8 | /* Number of URB Entries */
85 gpe_context->vfe_state.gpgpu_mode << 2); /* MEDIA Mode */
86 OUT_BATCH(batch, 0); /* Debug: Object ID */
88 gpe_context->vfe_state.urb_entry_size << 16 | /* URB Entry Allocation Size */
89 gpe_context->vfe_state.curbe_allocation_size); /* CURBE Allocation Size */
90 /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */
91 OUT_BATCH(batch, gpe_context->vfe_desc5.dword);
92 OUT_BATCH(batch, gpe_context->vfe_desc6.dword);
93 OUT_BATCH(batch, gpe_context->vfe_desc7.dword);
100 gen6_gpe_curbe_load(VADriverContextP ctx,
101 struct i965_gpe_context *gpe_context,
102 struct intel_batchbuffer *batch)
104 BEGIN_BATCH(batch, 4);
106 OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
108 OUT_BATCH(batch, gpe_context->curbe.length);
109 OUT_RELOC(batch, gpe_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
111 ADVANCE_BATCH(batch);
115 gen6_gpe_idrt(VADriverContextP ctx,
116 struct i965_gpe_context *gpe_context,
117 struct intel_batchbuffer *batch)
119 BEGIN_BATCH(batch, 4);
121 OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
123 OUT_BATCH(batch, gpe_context->idrt.max_entries * gpe_context->idrt.entry_size);
124 OUT_RELOC(batch, gpe_context->idrt.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
126 ADVANCE_BATCH(batch);
130 i965_gpe_load_kernels(VADriverContextP ctx,
131 struct i965_gpe_context *gpe_context,
132 struct i965_kernel *kernel_list,
133 unsigned int num_kernels)
135 struct i965_driver_data *i965 = i965_driver_data(ctx);
138 assert(num_kernels <= MAX_GPE_KERNELS);
139 memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
140 gpe_context->num_kernels = num_kernels;
142 for (i = 0; i < num_kernels; i++) {
143 struct i965_kernel *kernel = &gpe_context->kernels[i];
145 kernel->bo = dri_bo_alloc(i965->intel.bufmgr,
150 dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
155 i965_gpe_context_destroy(struct i965_gpe_context *gpe_context)
159 dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
160 gpe_context->surface_state_binding_table.bo = NULL;
162 dri_bo_unreference(gpe_context->idrt.bo);
163 gpe_context->idrt.bo = NULL;
165 dri_bo_unreference(gpe_context->curbe.bo);
166 gpe_context->curbe.bo = NULL;
168 for (i = 0; i < gpe_context->num_kernels; i++) {
169 struct i965_kernel *kernel = &gpe_context->kernels[i];
171 dri_bo_unreference(kernel->bo);
177 i965_gpe_context_init(VADriverContextP ctx,
178 struct i965_gpe_context *gpe_context)
180 struct i965_driver_data *i965 = i965_driver_data(ctx);
183 dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
184 bo = dri_bo_alloc(i965->intel.bufmgr,
185 "surface state & binding table",
186 gpe_context->surface_state_binding_table.length,
189 gpe_context->surface_state_binding_table.bo = bo;
191 dri_bo_unreference(gpe_context->idrt.bo);
192 bo = dri_bo_alloc(i965->intel.bufmgr,
193 "interface descriptor table",
194 gpe_context->idrt.entry_size * gpe_context->idrt.max_entries,
197 gpe_context->idrt.bo = bo;
199 dri_bo_unreference(gpe_context->curbe.bo);
200 bo = dri_bo_alloc(i965->intel.bufmgr,
202 gpe_context->curbe.length,
205 gpe_context->curbe.bo = bo;
209 gen6_gpe_pipeline_setup(VADriverContextP ctx,
210 struct i965_gpe_context *gpe_context,
211 struct intel_batchbuffer *batch)
213 intel_batchbuffer_emit_mi_flush(batch);
215 i965_gpe_select(ctx, gpe_context, batch);
216 gen6_gpe_state_base_address(ctx, gpe_context, batch);
217 gen6_gpe_vfe_state(ctx, gpe_context, batch);
218 gen6_gpe_curbe_load(ctx, gpe_context, batch);
219 gen6_gpe_idrt(ctx, gpe_context, batch);
223 i965_gpe_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
226 case I915_TILING_NONE:
227 ss->ss3.tiled_surface = 0;
228 ss->ss3.tile_walk = 0;
231 ss->ss3.tiled_surface = 1;
232 ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
235 ss->ss3.tiled_surface = 1;
236 ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
242 i965_gpe_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
245 case I915_TILING_NONE:
246 ss->ss2.tiled_surface = 0;
247 ss->ss2.tile_walk = 0;
250 ss->ss2.tiled_surface = 1;
251 ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
254 ss->ss2.tiled_surface = 1;
255 ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
261 gen7_gpe_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
264 case I915_TILING_NONE:
265 ss->ss0.tiled_surface = 0;
266 ss->ss0.tile_walk = 0;
269 ss->ss0.tiled_surface = 1;
270 ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
273 ss->ss0.tiled_surface = 1;
274 ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
280 gen7_gpe_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
283 case I915_TILING_NONE:
284 ss->ss2.tiled_surface = 0;
285 ss->ss2.tile_walk = 0;
288 ss->ss2.tiled_surface = 1;
289 ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
292 ss->ss2.tiled_surface = 1;
293 ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
299 gen8_gpe_set_surface_tiling(struct gen8_surface_state *ss, unsigned int tiling)
302 case I915_TILING_NONE:
303 ss->ss0.tiled_surface = 0;
304 ss->ss0.tile_walk = 0;
307 ss->ss0.tiled_surface = 1;
308 ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
311 ss->ss0.tiled_surface = 1;
312 ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
318 gen8_gpe_set_surface2_tiling(struct gen8_surface_state2 *ss, unsigned int tiling)
321 case I915_TILING_NONE:
322 ss->ss2.tiled_surface = 0;
323 ss->ss2.tile_walk = 0;
326 ss->ss2.tiled_surface = 1;
327 ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
330 ss->ss2.tiled_surface = 1;
331 ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
337 i965_gpe_set_surface2_state(VADriverContextP ctx,
338 struct object_surface *obj_surface,
339 struct i965_surface_state2 *ss)
342 unsigned int tiling, swizzle;
344 assert(obj_surface->bo);
345 assert(obj_surface->fourcc == VA_FOURCC_NV12);
347 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
348 w = obj_surface->orig_width;
349 h = obj_surface->orig_height;
350 w_pitch = obj_surface->width;
352 memset(ss, 0, sizeof(*ss));
354 ss->ss0.surface_base_address = obj_surface->bo->offset;
356 ss->ss1.cbcr_pixel_offset_v_direction = 2;
357 ss->ss1.width = w - 1;
358 ss->ss1.height = h - 1;
360 ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
361 ss->ss2.interleave_chroma = 1;
362 ss->ss2.pitch = w_pitch - 1;
363 ss->ss2.half_pitch_for_chroma = 0;
364 i965_gpe_set_surface2_tiling(ss, tiling);
365 /* ss3: UV offset for interleave mode */
366 ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
367 ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
371 i965_gpe_surface2_setup(VADriverContextP ctx,
372 struct i965_gpe_context *gpe_context,
373 struct object_surface *obj_surface,
374 unsigned long binding_table_offset,
375 unsigned long surface_state_offset)
377 struct i965_surface_state2 *ss;
380 bo = gpe_context->surface_state_binding_table.bo;
384 ss = (struct i965_surface_state2 *)((char *)bo->virtual + surface_state_offset);
385 i965_gpe_set_surface2_state(ctx, obj_surface, ss);
386 dri_bo_emit_reloc(bo,
387 I915_GEM_DOMAIN_RENDER, 0,
389 surface_state_offset + offsetof(struct i965_surface_state2, ss0),
392 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
397 i965_gpe_set_media_rw_surface_state(VADriverContextP ctx,
398 struct object_surface *obj_surface,
399 struct i965_surface_state *ss)
402 unsigned int tiling, swizzle;
404 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
405 w = obj_surface->orig_width;
406 h = obj_surface->orig_height;
407 w_pitch = obj_surface->width;
409 memset(ss, 0, sizeof(*ss));
411 ss->ss0.surface_type = I965_SURFACE_2D;
412 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
414 ss->ss1.base_addr = obj_surface->bo->offset;
416 ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
417 ss->ss2.height = h - 1;
419 ss->ss3.pitch = w_pitch - 1;
420 i965_gpe_set_surface_tiling(ss, tiling);
424 i965_gpe_media_rw_surface_setup(VADriverContextP ctx,
425 struct i965_gpe_context *gpe_context,
426 struct object_surface *obj_surface,
427 unsigned long binding_table_offset,
428 unsigned long surface_state_offset)
430 struct i965_surface_state *ss;
433 bo = gpe_context->surface_state_binding_table.bo;
434 dri_bo_map(bo, True);
437 ss = (struct i965_surface_state *)((char *)bo->virtual + surface_state_offset);
438 i965_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
439 dri_bo_emit_reloc(bo,
440 I915_GEM_DOMAIN_RENDER, 0,
442 surface_state_offset + offsetof(struct i965_surface_state, ss1),
445 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
450 i965_gpe_set_buffer_surface_state(VADriverContextP ctx,
451 struct i965_buffer_surface *buffer_surface,
452 struct i965_surface_state *ss)
456 assert(buffer_surface->bo);
457 num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
459 memset(ss, 0, sizeof(*ss));
461 ss->ss0.render_cache_read_mode = 1;
462 ss->ss0.surface_type = I965_SURFACE_BUFFER;
464 ss->ss1.base_addr = buffer_surface->bo->offset;
466 ss->ss2.width = ((num_entries - 1) & 0x7f);
467 ss->ss2.height = (((num_entries - 1) >> 7) & 0x1fff);
469 ss->ss3.depth = (((num_entries - 1) >> 20) & 0x7f);
470 ss->ss3.pitch = buffer_surface->pitch - 1;
474 i965_gpe_buffer_suface_setup(VADriverContextP ctx,
475 struct i965_gpe_context *gpe_context,
476 struct i965_buffer_surface *buffer_surface,
477 unsigned long binding_table_offset,
478 unsigned long surface_state_offset)
480 struct i965_surface_state *ss;
483 bo = gpe_context->surface_state_binding_table.bo;
487 ss = (struct i965_surface_state *)((char *)bo->virtual + surface_state_offset);
488 i965_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
489 dri_bo_emit_reloc(bo,
490 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
492 surface_state_offset + offsetof(struct i965_surface_state, ss1),
495 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
500 gen7_gpe_set_surface2_state(VADriverContextP ctx,
501 struct object_surface *obj_surface,
502 struct gen7_surface_state2 *ss)
505 unsigned int tiling, swizzle;
507 assert(obj_surface->bo);
508 assert(obj_surface->fourcc == VA_FOURCC_NV12);
510 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
511 w = obj_surface->orig_width;
512 h = obj_surface->orig_height;
513 w_pitch = obj_surface->width;
515 memset(ss, 0, sizeof(*ss));
517 ss->ss0.surface_base_address = obj_surface->bo->offset;
519 ss->ss1.cbcr_pixel_offset_v_direction = 2;
520 ss->ss1.width = w - 1;
521 ss->ss1.height = h - 1;
523 ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
524 ss->ss2.interleave_chroma = 1;
525 ss->ss2.pitch = w_pitch - 1;
526 ss->ss2.half_pitch_for_chroma = 0;
527 gen7_gpe_set_surface2_tiling(ss, tiling);
528 /* ss3: UV offset for interleave mode */
529 ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
530 ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
534 gen7_gpe_surface2_setup(VADriverContextP ctx,
535 struct i965_gpe_context *gpe_context,
536 struct object_surface *obj_surface,
537 unsigned long binding_table_offset,
538 unsigned long surface_state_offset)
540 struct gen7_surface_state2 *ss;
543 bo = gpe_context->surface_state_binding_table.bo;
547 ss = (struct gen7_surface_state2 *)((char *)bo->virtual + surface_state_offset);
548 gen7_gpe_set_surface2_state(ctx, obj_surface, ss);
549 dri_bo_emit_reloc(bo,
550 I915_GEM_DOMAIN_RENDER, 0,
552 surface_state_offset + offsetof(struct gen7_surface_state2, ss0),
555 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
560 gen7_gpe_set_media_rw_surface_state(VADriverContextP ctx,
561 struct object_surface *obj_surface,
562 struct gen7_surface_state *ss)
565 unsigned int tiling, swizzle;
567 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
568 w = obj_surface->orig_width;
569 h = obj_surface->orig_height;
570 w_pitch = obj_surface->width;
572 memset(ss, 0, sizeof(*ss));
574 ss->ss0.surface_type = I965_SURFACE_2D;
575 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
577 ss->ss1.base_addr = obj_surface->bo->offset;
579 ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
580 ss->ss2.height = h - 1;
582 ss->ss3.pitch = w_pitch - 1;
583 gen7_gpe_set_surface_tiling(ss, tiling);
587 gen75_gpe_set_media_chroma_surface_state(VADriverContextP ctx,
588 struct object_surface *obj_surface,
589 struct gen7_surface_state *ss)
592 unsigned int tiling, swizzle;
595 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
596 w = obj_surface->orig_width;
597 w_pitch = obj_surface->width;
599 cbcr_offset = obj_surface->height * obj_surface->width;
600 memset(ss, 0, sizeof(*ss));
602 ss->ss0.surface_type = I965_SURFACE_2D;
603 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
605 ss->ss1.base_addr = obj_surface->bo->offset + cbcr_offset;
607 ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
608 ss->ss2.height = (obj_surface->height / 2) -1;
610 ss->ss3.pitch = w_pitch - 1;
611 gen7_gpe_set_surface_tiling(ss, tiling);
615 gen7_gpe_media_rw_surface_setup(VADriverContextP ctx,
616 struct i965_gpe_context *gpe_context,
617 struct object_surface *obj_surface,
618 unsigned long binding_table_offset,
619 unsigned long surface_state_offset)
621 struct gen7_surface_state *ss;
624 bo = gpe_context->surface_state_binding_table.bo;
625 dri_bo_map(bo, True);
628 ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
629 gen7_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
630 dri_bo_emit_reloc(bo,
631 I915_GEM_DOMAIN_RENDER, 0,
633 surface_state_offset + offsetof(struct gen7_surface_state, ss1),
636 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
641 gen75_gpe_media_chroma_surface_setup(VADriverContextP ctx,
642 struct i965_gpe_context *gpe_context,
643 struct object_surface *obj_surface,
644 unsigned long binding_table_offset,
645 unsigned long surface_state_offset)
647 struct gen7_surface_state *ss;
651 assert(obj_surface->fourcc == VA_FOURCC_NV12);
652 bo = gpe_context->surface_state_binding_table.bo;
653 dri_bo_map(bo, True);
656 cbcr_offset = obj_surface->height * obj_surface->width;
657 ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
658 gen75_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss);
659 dri_bo_emit_reloc(bo,
660 I915_GEM_DOMAIN_RENDER, 0,
662 surface_state_offset + offsetof(struct gen7_surface_state, ss1),
665 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
671 gen7_gpe_set_buffer_surface_state(VADriverContextP ctx,
672 struct i965_buffer_surface *buffer_surface,
673 struct gen7_surface_state *ss)
677 assert(buffer_surface->bo);
678 num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
680 memset(ss, 0, sizeof(*ss));
682 ss->ss0.surface_type = I965_SURFACE_BUFFER;
684 ss->ss1.base_addr = buffer_surface->bo->offset;
686 ss->ss2.width = ((num_entries - 1) & 0x7f);
687 ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
689 ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
690 ss->ss3.pitch = buffer_surface->pitch - 1;
694 gen7_gpe_buffer_suface_setup(VADriverContextP ctx,
695 struct i965_gpe_context *gpe_context,
696 struct i965_buffer_surface *buffer_surface,
697 unsigned long binding_table_offset,
698 unsigned long surface_state_offset)
700 struct gen7_surface_state *ss;
703 bo = gpe_context->surface_state_binding_table.bo;
707 ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
708 gen7_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
709 dri_bo_emit_reloc(bo,
710 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
712 surface_state_offset + offsetof(struct gen7_surface_state, ss1),
715 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
720 gen8_gpe_set_surface2_state(VADriverContextP ctx,
721 struct object_surface *obj_surface,
722 struct gen8_surface_state2 *ss)
725 unsigned int tiling, swizzle;
727 assert(obj_surface->bo);
728 assert(obj_surface->fourcc == VA_FOURCC_NV12);
730 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
731 w = obj_surface->orig_width;
732 h = obj_surface->orig_height;
733 w_pitch = obj_surface->width;
735 memset(ss, 0, sizeof(*ss));
737 ss->ss6.base_addr = obj_surface->bo->offset;
739 ss->ss1.cbcr_pixel_offset_v_direction = 2;
740 ss->ss1.width = w - 1;
741 ss->ss1.height = h - 1;
743 ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
744 ss->ss2.interleave_chroma = 1;
745 ss->ss2.pitch = w_pitch - 1;
746 ss->ss2.half_pitch_for_chroma = 0;
747 gen8_gpe_set_surface2_tiling(ss, tiling);
748 /* ss3: UV offset for interleave mode */
749 ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
750 ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
754 gen8_gpe_surface2_setup(VADriverContextP ctx,
755 struct i965_gpe_context *gpe_context,
756 struct object_surface *obj_surface,
757 unsigned long binding_table_offset,
758 unsigned long surface_state_offset)
760 struct gen8_surface_state2 *ss;
763 bo = gpe_context->surface_state_binding_table.bo;
767 ss = (struct gen8_surface_state2 *)((char *)bo->virtual + surface_state_offset);
768 gen8_gpe_set_surface2_state(ctx, obj_surface, ss);
769 dri_bo_emit_reloc(bo,
770 I915_GEM_DOMAIN_RENDER, 0,
772 surface_state_offset + offsetof(struct gen8_surface_state2, ss6),
775 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
780 gen8_gpe_set_media_rw_surface_state(VADriverContextP ctx,
781 struct object_surface *obj_surface,
782 struct gen8_surface_state *ss)
785 unsigned int tiling, swizzle;
787 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
788 w = obj_surface->orig_width;
789 h = obj_surface->orig_height;
790 w_pitch = obj_surface->width;
792 memset(ss, 0, sizeof(*ss));
794 ss->ss0.surface_type = I965_SURFACE_2D;
795 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
797 ss->ss8.base_addr = obj_surface->bo->offset;
799 ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
800 ss->ss2.height = h - 1;
802 ss->ss3.pitch = w_pitch - 1;
803 gen8_gpe_set_surface_tiling(ss, tiling);
807 gen8_gpe_set_media_chroma_surface_state(VADriverContextP ctx,
808 struct object_surface *obj_surface,
809 struct gen8_surface_state *ss)
812 unsigned int tiling, swizzle;
815 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
816 w = obj_surface->orig_width;
817 h = obj_surface->orig_height;
818 w_pitch = obj_surface->width;
820 cbcr_offset = obj_surface->height * obj_surface->width;
821 memset(ss, 0, sizeof(*ss));
823 ss->ss0.surface_type = I965_SURFACE_2D;
824 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
826 ss->ss8.base_addr = obj_surface->bo->offset + cbcr_offset;
828 ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
829 ss->ss2.height = (obj_surface->height / 2) -1;
831 ss->ss3.pitch = w_pitch - 1;
832 gen8_gpe_set_surface_tiling(ss, tiling);
836 gen8_gpe_media_rw_surface_setup(VADriverContextP ctx,
837 struct i965_gpe_context *gpe_context,
838 struct object_surface *obj_surface,
839 unsigned long binding_table_offset,
840 unsigned long surface_state_offset)
842 struct gen8_surface_state *ss;
845 bo = gpe_context->surface_state_binding_table.bo;
846 dri_bo_map(bo, True);
849 ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
850 gen8_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
851 dri_bo_emit_reloc(bo,
852 I915_GEM_DOMAIN_RENDER, 0,
854 surface_state_offset + offsetof(struct gen8_surface_state, ss8),
857 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
862 gen8_gpe_media_chroma_surface_setup(VADriverContextP ctx,
863 struct i965_gpe_context *gpe_context,
864 struct object_surface *obj_surface,
865 unsigned long binding_table_offset,
866 unsigned long surface_state_offset)
868 struct gen8_surface_state *ss;
872 assert(obj_surface->fourcc == VA_FOURCC_NV12);
873 bo = gpe_context->surface_state_binding_table.bo;
874 dri_bo_map(bo, True);
877 cbcr_offset = obj_surface->height * obj_surface->width;
878 ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
879 gen8_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss);
880 dri_bo_emit_reloc(bo,
881 I915_GEM_DOMAIN_RENDER, 0,
883 surface_state_offset + offsetof(struct gen8_surface_state, ss8),
886 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
892 gen8_gpe_set_buffer_surface_state(VADriverContextP ctx,
893 struct i965_buffer_surface *buffer_surface,
894 struct gen8_surface_state *ss)
898 assert(buffer_surface->bo);
899 num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
901 memset(ss, 0, sizeof(*ss));
903 ss->ss0.surface_type = I965_SURFACE_BUFFER;
905 ss->ss8.base_addr = buffer_surface->bo->offset;
907 ss->ss2.width = ((num_entries - 1) & 0x7f);
908 ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
910 ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
911 ss->ss3.pitch = buffer_surface->pitch - 1;
915 gen8_gpe_buffer_suface_setup(VADriverContextP ctx,
916 struct i965_gpe_context *gpe_context,
917 struct i965_buffer_surface *buffer_surface,
918 unsigned long binding_table_offset,
919 unsigned long surface_state_offset)
921 struct gen8_surface_state *ss;
924 bo = gpe_context->surface_state_binding_table.bo;
928 ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
929 gen8_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
930 dri_bo_emit_reloc(bo,
931 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
933 surface_state_offset + offsetof(struct gen8_surface_state, ss8),
936 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
941 gen8_gpe_state_base_address(VADriverContextP ctx,
942 struct i965_gpe_context *gpe_context,
943 struct intel_batchbuffer *batch)
945 BEGIN_BATCH(batch, 16);
947 OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 14);
949 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //General State Base Address
953 /*DW4 Surface state base address */
954 OUT_RELOC(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
957 /*DW6. Dynamic state base address */
958 if (gpe_context->dynamic_state.bo)
959 OUT_RELOC(batch, gpe_context->dynamic_state.bo,
960 I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
961 0, BASE_ADDRESS_MODIFY);
963 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
967 /*DW8. Indirect Object base address */
968 if (gpe_context->indirect_state.bo)
969 OUT_RELOC(batch, gpe_context->indirect_state.bo,
970 I915_GEM_DOMAIN_SAMPLER,
971 0, BASE_ADDRESS_MODIFY);
973 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
977 /*DW10. Instruct base address */
978 if (gpe_context->instruction_state.bo)
979 OUT_RELOC(batch, gpe_context->instruction_state.bo,
980 I915_GEM_DOMAIN_INSTRUCTION,
981 0, BASE_ADDRESS_MODIFY);
983 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
987 /* DW12. Size limitation */
988 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //General State Access Upper Bound
989 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Dynamic State Access Upper Bound
990 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Indirect Object Access Upper Bound
991 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Instruction Access Upper Bound
994 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //LLC Coherent Base Address
995 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY ); //LLC Coherent Upper Bound
998 ADVANCE_BATCH(batch);
1002 gen8_gpe_vfe_state(VADriverContextP ctx,
1003 struct i965_gpe_context *gpe_context,
1004 struct intel_batchbuffer *batch)
1007 BEGIN_BATCH(batch, 9);
1009 OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (9 - 2));
1010 /* Scratch Space Base Pointer and Space */
1011 OUT_BATCH(batch, 0);
1012 OUT_BATCH(batch, 0);
1015 gpe_context->vfe_state.max_num_threads << 16 | /* Maximum Number of Threads */
1016 gpe_context->vfe_state.num_urb_entries << 8 | /* Number of URB Entries */
1017 gpe_context->vfe_state.gpgpu_mode << 2); /* MEDIA Mode */
1018 OUT_BATCH(batch, 0); /* Debug: Object ID */
1020 gpe_context->vfe_state.urb_entry_size << 16 | /* URB Entry Allocation Size */
1021 gpe_context->vfe_state.curbe_allocation_size); /* CURBE Allocation Size */
1023 /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */
1024 OUT_BATCH(batch, gpe_context->vfe_desc5.dword);
1025 OUT_BATCH(batch, gpe_context->vfe_desc6.dword);
1026 OUT_BATCH(batch, gpe_context->vfe_desc7.dword);
1028 ADVANCE_BATCH(batch);
1034 gen8_gpe_curbe_load(VADriverContextP ctx,
1035 struct i965_gpe_context *gpe_context,
1036 struct intel_batchbuffer *batch)
1038 BEGIN_BATCH(batch, 4);
1040 OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
1041 OUT_BATCH(batch, 0);
1042 OUT_BATCH(batch, gpe_context->curbe_size);
1043 OUT_BATCH(batch, gpe_context->curbe_offset);
1045 ADVANCE_BATCH(batch);
1049 gen8_gpe_idrt(VADriverContextP ctx,
1050 struct i965_gpe_context *gpe_context,
1051 struct intel_batchbuffer *batch)
1053 BEGIN_BATCH(batch, 6);
1055 OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH);
1056 OUT_BATCH(batch, 0);
1058 OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
1059 OUT_BATCH(batch, 0);
1060 OUT_BATCH(batch, gpe_context->idrt_size);
1061 OUT_BATCH(batch, gpe_context->idrt_offset);
1063 ADVANCE_BATCH(batch);
1068 gen8_gpe_pipeline_setup(VADriverContextP ctx,
1069 struct i965_gpe_context *gpe_context,
1070 struct intel_batchbuffer *batch)
1072 intel_batchbuffer_emit_mi_flush(batch);
1074 i965_gpe_select(ctx, gpe_context, batch);
1075 gen8_gpe_state_base_address(ctx, gpe_context, batch);
1076 gen8_gpe_vfe_state(ctx, gpe_context, batch);
1077 gen8_gpe_curbe_load(ctx, gpe_context, batch);
1078 gen8_gpe_idrt(ctx, gpe_context, batch);
1082 gen8_gpe_context_init(VADriverContextP ctx,
1083 struct i965_gpe_context *gpe_context)
1085 struct i965_driver_data *i965 = i965_driver_data(ctx);
1088 unsigned int start_offset, end_offset;
1090 dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
1091 bo = dri_bo_alloc(i965->intel.bufmgr,
1092 "surface state & binding table",
1093 gpe_context->surface_state_binding_table.length,
1096 gpe_context->surface_state_binding_table.bo = bo;
1098 bo_size = gpe_context->idrt_size + gpe_context->curbe_size + gpe_context->sampler_size + 192;
1099 dri_bo_unreference(gpe_context->dynamic_state.bo);
1100 bo = dri_bo_alloc(i965->intel.bufmgr,
1101 "surface state & binding table",
1105 gpe_context->dynamic_state.bo = bo;
1106 gpe_context->dynamic_state.bo_size = bo_size;
1109 gpe_context->dynamic_state.end_offset = 0;
1111 /* Constant buffer offset */
1112 start_offset = ALIGN(end_offset, 64);
1113 gpe_context->curbe_offset = start_offset;
1114 end_offset = start_offset + gpe_context->curbe_size;
1116 /* Interface descriptor offset */
1117 start_offset = ALIGN(end_offset, 64);
1118 gpe_context->idrt_offset = start_offset;
1119 end_offset = start_offset + gpe_context->idrt_size;
1121 /* Sampler state offset */
1122 start_offset = ALIGN(end_offset, 64);
1123 gpe_context->sampler_offset = start_offset;
1124 end_offset = start_offset + gpe_context->sampler_size;
1126 /* update the end offset of dynamic_state */
1127 gpe_context->dynamic_state.end_offset = end_offset;
1132 gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)
1136 dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
1137 gpe_context->surface_state_binding_table.bo = NULL;
1139 dri_bo_unreference(gpe_context->instruction_state.bo);
1140 gpe_context->instruction_state.bo = NULL;
1142 dri_bo_unreference(gpe_context->dynamic_state.bo);
1143 gpe_context->dynamic_state.bo = NULL;
1145 dri_bo_unreference(gpe_context->indirect_state.bo);
1146 gpe_context->indirect_state.bo = NULL;
1152 gen8_gpe_load_kernels(VADriverContextP ctx,
1153 struct i965_gpe_context *gpe_context,
1154 struct i965_kernel *kernel_list,
1155 unsigned int num_kernels)
1157 struct i965_driver_data *i965 = i965_driver_data(ctx);
1159 unsigned int kernel_offset, end_offset;
1160 unsigned char *kernel_ptr;
1161 struct i965_kernel *kernel;
1163 assert(num_kernels <= MAX_GPE_KERNELS);
1164 memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
1165 gpe_context->num_kernels = num_kernels;
1167 kernel_size = num_kernels * 64;
1168 for (i = 0; i < num_kernels; i++) {
1169 kernel = &gpe_context->kernels[i];
1171 kernel_size += kernel->size;
1174 gpe_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
1178 if (gpe_context->instruction_state.bo == NULL) {
1179 WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
1183 assert(gpe_context->instruction_state.bo);
1185 gpe_context->instruction_state.bo_size = kernel_size;
1186 gpe_context->instruction_state.end_offset = 0;
1189 dri_bo_map(gpe_context->instruction_state.bo, 1);
1190 kernel_ptr = (unsigned char *)(gpe_context->instruction_state.bo->virtual);
1191 for (i = 0; i < num_kernels; i++) {
1192 kernel_offset = ALIGN(end_offset, 64);
1193 kernel = &gpe_context->kernels[i];
1194 kernel->kernel_offset = kernel_offset;
1197 memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
1199 end_offset = kernel_offset + kernel->size;
1203 gpe_context->instruction_state.end_offset = end_offset;
1205 dri_bo_unmap(gpe_context->instruction_state.bo);