2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
34 #include <va/va_backend.h>
36 #include "intel_batchbuffer.h"
37 #include "intel_driver.h"
38 #include "i965_defines.h"
39 #include "i965_structs.h"
40 #include "i965_drv_video.h"
41 #include "i965_post_processing.h"
42 #include "i965_render.h"
44 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) || \
45 IS_GEN6((ctx)->intel.device_id))
47 static const uint32_t pp_null_gen5[][4] = {
48 #include "shaders/post_processing/null.g4b.gen5"
51 static const uint32_t pp_nv12_load_save_gen5[][4] = {
52 #include "shaders/post_processing/nv12_load_save_nv12.g4b.gen5"
55 static const uint32_t pp_nv12_scaling_gen5[][4] = {
56 #include "shaders/post_processing/nv12_scaling_nv12.g4b.gen5"
59 static const uint32_t pp_nv12_avs_gen5[][4] = {
60 #include "shaders/post_processing/nv12_avs_nv12.g4b.gen5"
63 static const uint32_t pp_nv12_dndi_gen5[][4] = {
64 #include "shaders/post_processing/nv12_dndi_nv12.g4b.gen5"
67 static void pp_null_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
68 unsigned short srcw, unsigned short srch,
69 unsigned short destw, unsigned short desth);
70 static void pp_nv12_avs_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
71 unsigned short srcw, unsigned short srch,
72 unsigned short destw, unsigned short desth);
73 static void pp_nv12_scaling_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
74 unsigned short srcw, unsigned short srch,
75 unsigned short destw, unsigned short desth);
76 static void pp_nv12_load_save_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
77 unsigned short srcw, unsigned short srch,
78 unsigned short destw, unsigned short desth);
79 static void pp_nv12_dndi_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
80 unsigned short srcw, unsigned short srch,
81 unsigned short destw, unsigned short desth);
83 static struct pp_module pp_modules_gen5[] = {
86 "NULL module (for testing)",
98 "NV12 Load & Save module",
100 pp_nv12_load_save_gen5,
101 sizeof(pp_nv12_load_save_gen5),
105 pp_nv12_load_save_initialize,
110 "NV12 Scaling module",
112 pp_nv12_scaling_gen5,
113 sizeof(pp_nv12_scaling_gen5),
117 pp_nv12_scaling_initialize,
125 sizeof(pp_nv12_avs_gen5),
129 pp_nv12_avs_initialize,
137 sizeof(pp_nv12_dndi_gen5),
141 pp_nv12_dndi_initialize,
145 static const uint32_t pp_null_gen6[][4] = {
146 #include "shaders/post_processing/null.g6b"
149 static const uint32_t pp_nv12_load_save_gen6[][4] = {
150 #include "shaders/post_processing/nv12_load_save_nv12.g6b"
153 static const uint32_t pp_nv12_scaling_gen6[][4] = {
154 #include "shaders/post_processing/nv12_scaling_nv12.g6b"
157 static const uint32_t pp_nv12_avs_gen6[][4] = {
158 #include "shaders/post_processing/nv12_avs_nv12.g6b"
161 static const uint32_t pp_nv12_dndi_gen6[][4] = {
162 #include "shaders/post_processing/nv12_dndi_nv12.g6b"
165 static struct pp_module pp_modules_gen6[] = {
168 "NULL module (for testing)",
171 sizeof(pp_null_gen6),
180 "NV12 Load & Save module",
182 pp_nv12_load_save_gen6,
183 sizeof(pp_nv12_load_save_gen6),
187 pp_nv12_load_save_initialize,
192 "NV12 Scaling module",
194 pp_nv12_scaling_gen6,
195 sizeof(pp_nv12_scaling_gen6),
199 pp_nv12_scaling_initialize,
207 sizeof(pp_nv12_avs_gen6),
211 pp_nv12_avs_initialize,
219 sizeof(pp_nv12_dndi_gen6),
223 pp_nv12_dndi_initialize,
227 #define pp_static_parameter pp_context->pp_static_parameter
228 #define pp_inline_parameter pp_context->pp_inline_parameter
231 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
234 case I915_TILING_NONE:
235 ss->ss3.tiled_surface = 0;
236 ss->ss3.tile_walk = 0;
239 ss->ss3.tiled_surface = 1;
240 ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
243 ss->ss3.tiled_surface = 1;
244 ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
250 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
253 case I915_TILING_NONE:
254 ss->ss2.tiled_surface = 0;
255 ss->ss2.tile_walk = 0;
258 ss->ss2.tiled_surface = 1;
259 ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
262 ss->ss2.tiled_surface = 1;
263 ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
269 ironlake_pp_surface_state(struct i965_post_processing_context *pp_context)
275 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
277 struct i965_interface_descriptor *desc;
279 int pp_index = pp_context->current_pp;
281 bo = pp_context->idrt.bo;
285 memset(desc, 0, sizeof(*desc));
286 desc->desc0.grf_reg_blocks = 10;
287 desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
288 desc->desc1.const_urb_entry_read_offset = 0;
289 desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
290 desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
291 desc->desc2.sampler_count = 0;
292 desc->desc3.binding_table_entry_count = 0;
293 desc->desc3.binding_table_pointer =
294 pp_context->binding_table.bo->offset >> 5; /*reloc */
296 dri_bo_emit_reloc(bo,
297 I915_GEM_DOMAIN_INSTRUCTION, 0,
298 desc->desc0.grf_reg_blocks,
299 offsetof(struct i965_interface_descriptor, desc0),
300 pp_context->pp_modules[pp_index].kernel.bo);
302 dri_bo_emit_reloc(bo,
303 I915_GEM_DOMAIN_INSTRUCTION, 0,
304 desc->desc2.sampler_count << 2,
305 offsetof(struct i965_interface_descriptor, desc2),
306 pp_context->sampler_state_table.bo);
308 dri_bo_emit_reloc(bo,
309 I915_GEM_DOMAIN_INSTRUCTION, 0,
310 desc->desc3.binding_table_entry_count,
311 offsetof(struct i965_interface_descriptor, desc3),
312 pp_context->binding_table.bo);
315 pp_context->idrt.num_interface_descriptors++;
319 ironlake_pp_binding_table(struct i965_post_processing_context *pp_context)
321 unsigned int *binding_table;
322 dri_bo *bo = pp_context->binding_table.bo;
327 binding_table = bo->virtual;
328 memset(binding_table, 0, bo->size);
330 for (i = 0; i < MAX_PP_SURFACES; i++) {
331 if (pp_context->surfaces[i].ss_bo) {
332 assert(pp_context->surfaces[i].s_bo);
334 binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
335 dri_bo_emit_reloc(bo,
336 I915_GEM_DOMAIN_INSTRUCTION, 0,
338 i * sizeof(*binding_table),
339 pp_context->surfaces[i].ss_bo);
348 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
350 struct i965_vfe_state *vfe_state;
353 bo = pp_context->vfe_state.bo;
356 vfe_state = bo->virtual;
357 memset(vfe_state, 0, sizeof(*vfe_state));
358 vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
359 vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
360 vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
361 vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
362 vfe_state->vfe1.children_present = 0;
363 vfe_state->vfe2.interface_descriptor_base =
364 pp_context->idrt.bo->offset >> 4; /* reloc */
365 dri_bo_emit_reloc(bo,
366 I915_GEM_DOMAIN_INSTRUCTION, 0,
368 offsetof(struct i965_vfe_state, vfe2),
369 pp_context->idrt.bo);
374 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
376 unsigned char *constant_buffer;
378 assert(sizeof(pp_static_parameter) == 128);
379 dri_bo_map(pp_context->curbe.bo, 1);
380 assert(pp_context->curbe.bo->virtual);
381 constant_buffer = pp_context->curbe.bo->virtual;
382 memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
383 dri_bo_unmap(pp_context->curbe.bo);
387 ironlake_pp_states_setup(VADriverContextP ctx)
389 struct i965_driver_data *i965 = i965_driver_data(ctx);
390 struct i965_post_processing_context *pp_context = i965->pp_context;
392 ironlake_pp_surface_state(pp_context);
393 ironlake_pp_binding_table(pp_context);
394 ironlake_pp_interface_descriptor_table(pp_context);
395 ironlake_pp_vfe_state(pp_context);
396 ironlake_pp_upload_constants(pp_context);
400 ironlake_pp_pipeline_select(VADriverContextP ctx)
402 struct i965_driver_data *i965 = i965_driver_data(ctx);
403 struct intel_batchbuffer *batch = i965->batch;
405 BEGIN_BATCH(batch, 1);
406 OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
407 ADVANCE_BATCH(batch);
411 ironlake_pp_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
413 struct i965_driver_data *i965 = i965_driver_data(ctx);
414 struct intel_batchbuffer *batch = i965->batch;
415 unsigned int vfe_fence, cs_fence;
417 vfe_fence = pp_context->urb.cs_start;
418 cs_fence = pp_context->urb.size;
420 BEGIN_BATCH(batch, 3);
421 OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
424 (vfe_fence << UF2_VFE_FENCE_SHIFT) | /* VFE_SIZE */
425 (cs_fence << UF2_CS_FENCE_SHIFT)); /* CS_SIZE */
426 ADVANCE_BATCH(batch);
430 ironlake_pp_state_base_address(VADriverContextP ctx)
432 struct i965_driver_data *i965 = i965_driver_data(ctx);
433 struct intel_batchbuffer *batch = i965->batch;
435 BEGIN_BATCH(batch, 8);
436 OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
437 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
438 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
439 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
440 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
441 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
442 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
443 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
444 ADVANCE_BATCH(batch);
448 ironlake_pp_state_pointers(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
450 struct i965_driver_data *i965 = i965_driver_data(ctx);
451 struct intel_batchbuffer *batch = i965->batch;
453 BEGIN_BATCH(batch, 3);
454 OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
456 OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
457 ADVANCE_BATCH(batch);
461 ironlake_pp_cs_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
463 struct i965_driver_data *i965 = i965_driver_data(ctx);
464 struct intel_batchbuffer *batch = i965->batch;
466 BEGIN_BATCH(batch, 2);
467 OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
469 ((pp_context->urb.size_cs_entry - 1) << 4) | /* URB Entry Allocation Size */
470 (pp_context->urb.num_cs_entries << 0)); /* Number of URB Entries */
471 ADVANCE_BATCH(batch);
475 ironlake_pp_constant_buffer(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
477 struct i965_driver_data *i965 = i965_driver_data(ctx);
478 struct intel_batchbuffer *batch = i965->batch;
480 BEGIN_BATCH(batch, 2);
481 OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
482 OUT_RELOC(batch, pp_context->curbe.bo,
483 I915_GEM_DOMAIN_INSTRUCTION, 0,
484 pp_context->urb.size_cs_entry - 1);
485 ADVANCE_BATCH(batch);
489 ironlake_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
491 struct i965_driver_data *i965 = i965_driver_data(ctx);
492 struct intel_batchbuffer *batch = i965->batch;
493 int x, x_steps, y, y_steps;
495 x_steps = pp_context->pp_x_steps(&pp_context->private_context);
496 y_steps = pp_context->pp_y_steps(&pp_context->private_context);
498 for (y = 0; y < y_steps; y++) {
499 for (x = 0; x < x_steps; x++) {
500 if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
501 BEGIN_BATCH(batch, 20);
502 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
504 OUT_BATCH(batch, 0); /* no indirect data */
507 /* inline data grf 5-6 */
508 assert(sizeof(pp_inline_parameter) == 64);
509 intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
511 ADVANCE_BATCH(batch);
518 ironlake_pp_pipeline_setup(VADriverContextP ctx)
520 struct i965_driver_data *i965 = i965_driver_data(ctx);
521 struct intel_batchbuffer *batch = i965->batch;
522 struct i965_post_processing_context *pp_context = i965->pp_context;
524 intel_batchbuffer_start_atomic(batch, 0x1000);
525 intel_batchbuffer_emit_mi_flush(batch);
526 ironlake_pp_pipeline_select(ctx);
527 ironlake_pp_state_base_address(ctx);
528 ironlake_pp_state_pointers(ctx, pp_context);
529 ironlake_pp_urb_layout(ctx, pp_context);
530 ironlake_pp_cs_urb_layout(ctx, pp_context);
531 ironlake_pp_constant_buffer(ctx, pp_context);
532 ironlake_pp_object_walker(ctx, pp_context);
533 intel_batchbuffer_end_atomic(batch);
537 pp_null_x_steps(void *private_context)
543 pp_null_y_steps(void *private_context)
549 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
555 pp_null_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
556 unsigned short srcw, unsigned short srch,
557 unsigned short destw, unsigned short desth)
559 struct i965_driver_data *i965 = i965_driver_data(ctx);
560 struct i965_post_processing_context *pp_context = i965->pp_context;
561 struct object_surface *obj_surface;
564 obj_surface = SURFACE(surface);
565 dri_bo_unreference(obj_surface->pp_out_bo);
566 obj_surface->pp_out_bo = obj_surface->bo;
567 dri_bo_reference(obj_surface->pp_out_bo);
568 assert(obj_surface->pp_out_bo);
569 obj_surface->pp_out_width = obj_surface->width;
570 obj_surface->pp_out_height = obj_surface->height;
571 obj_surface->orig_pp_out_width = obj_surface->orig_width;
572 obj_surface->orig_pp_out_height = obj_surface->orig_height;
574 /* private function & data */
575 pp_context->pp_x_steps = pp_null_x_steps;
576 pp_context->pp_y_steps = pp_null_y_steps;
577 pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
581 pp_load_save_x_steps(void *private_context)
587 pp_load_save_y_steps(void *private_context)
589 struct pp_load_save_context *pp_load_save_context = private_context;
591 return pp_load_save_context->dest_h / 8;
595 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
597 pp_inline_parameter.grf5.block_vertical_mask = 0xff;
598 pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
599 pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
600 pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
606 pp_nv12_load_save_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
607 unsigned short srcw, unsigned short srch,
608 unsigned short destw, unsigned short desth)
610 struct i965_driver_data *i965 = i965_driver_data(ctx);
611 struct i965_post_processing_context *pp_context = i965->pp_context;
612 struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
613 struct object_surface *obj_surface;
614 struct i965_surface_state *ss;
618 unsigned int tiling, swizzle;
621 obj_surface = SURFACE(surface);
622 orig_w = obj_surface->orig_width;
623 orig_h = obj_surface->orig_height;
624 w = obj_surface->width;
625 h = obj_surface->height;
627 dri_bo_unreference(obj_surface->pp_out_bo);
628 obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
629 "intermediate surface",
632 assert(obj_surface->pp_out_bo);
633 obj_surface->pp_out_width = obj_surface->width;
634 obj_surface->pp_out_height = obj_surface->height;
635 obj_surface->orig_pp_out_width = obj_surface->orig_width;
636 obj_surface->orig_pp_out_height = obj_surface->orig_height;
638 /* source Y surface index 1 */
639 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
642 pp_context->surfaces[index].s_bo = obj_surface->bo;
643 dri_bo_reference(pp_context->surfaces[index].s_bo);
644 bo = dri_bo_alloc(i965->intel.bufmgr,
646 sizeof(struct i965_surface_state),
649 pp_context->surfaces[index].ss_bo = bo;
650 dri_bo_map(bo, True);
653 memset(ss, 0, sizeof(*ss));
654 ss->ss0.surface_type = I965_SURFACE_2D;
655 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
656 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
657 ss->ss2.width = orig_w / 4 - 1;
658 ss->ss2.height = orig_h - 1;
659 ss->ss3.pitch = w - 1;
660 pp_set_surface_tiling(ss, tiling);
661 dri_bo_emit_reloc(bo,
662 I915_GEM_DOMAIN_RENDER,
665 offsetof(struct i965_surface_state, ss1),
666 pp_context->surfaces[index].s_bo);
669 /* source UV surface index 2 */
671 pp_context->surfaces[index].s_bo = obj_surface->bo;
672 dri_bo_reference(pp_context->surfaces[index].s_bo);
673 bo = dri_bo_alloc(i965->intel.bufmgr,
675 sizeof(struct i965_surface_state),
678 pp_context->surfaces[index].ss_bo = bo;
679 dri_bo_map(bo, True);
682 memset(ss, 0, sizeof(*ss));
683 ss->ss0.surface_type = I965_SURFACE_2D;
684 ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
685 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
686 ss->ss2.width = orig_w / 4 - 1;
687 ss->ss2.height = orig_h / 2 - 1;
688 ss->ss3.pitch = w - 1;
689 pp_set_surface_tiling(ss, tiling);
690 dri_bo_emit_reloc(bo,
691 I915_GEM_DOMAIN_RENDER,
694 offsetof(struct i965_surface_state, ss1),
695 pp_context->surfaces[index].s_bo);
698 /* destination Y surface index 7 */
700 pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
701 dri_bo_reference(pp_context->surfaces[index].s_bo);
702 bo = dri_bo_alloc(i965->intel.bufmgr,
704 sizeof(struct i965_surface_state),
707 pp_context->surfaces[index].ss_bo = bo;
708 dri_bo_map(bo, True);
711 memset(ss, 0, sizeof(*ss));
712 ss->ss0.surface_type = I965_SURFACE_2D;
713 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
714 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
715 ss->ss2.width = orig_w / 4 - 1;
716 ss->ss2.height = orig_h - 1;
717 ss->ss3.pitch = w - 1;
718 dri_bo_emit_reloc(bo,
719 I915_GEM_DOMAIN_RENDER,
720 I915_GEM_DOMAIN_RENDER,
722 offsetof(struct i965_surface_state, ss1),
723 pp_context->surfaces[index].s_bo);
726 /* destination UV surface index 8 */
728 pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
729 dri_bo_reference(pp_context->surfaces[index].s_bo);
730 bo = dri_bo_alloc(i965->intel.bufmgr,
732 sizeof(struct i965_surface_state),
735 pp_context->surfaces[index].ss_bo = bo;
736 dri_bo_map(bo, True);
739 memset(ss, 0, sizeof(*ss));
740 ss->ss0.surface_type = I965_SURFACE_2D;
741 ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
742 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
743 ss->ss2.width = orig_w / 4 - 1;
744 ss->ss2.height = orig_h / 2 - 1;
745 ss->ss3.pitch = w - 1;
746 dri_bo_emit_reloc(bo,
747 I915_GEM_DOMAIN_RENDER,
748 I915_GEM_DOMAIN_RENDER,
750 offsetof(struct i965_surface_state, ss1),
751 pp_context->surfaces[index].s_bo);
754 /* private function & data */
755 pp_context->pp_x_steps = pp_load_save_x_steps;
756 pp_context->pp_y_steps = pp_load_save_y_steps;
757 pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
758 pp_load_save_context->dest_h = h;
759 pp_load_save_context->dest_w = w;
761 pp_inline_parameter.grf5.block_count_x = w / 16; /* 1 x N */
762 pp_inline_parameter.grf5.number_blocks = w / 16;
766 pp_scaling_x_steps(void *private_context)
772 pp_scaling_y_steps(void *private_context)
774 struct pp_scaling_context *pp_scaling_context = private_context;
776 return pp_scaling_context->dest_h / 8;
780 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
782 float src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
783 float src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
785 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16;
786 pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8;
787 pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
788 pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
794 pp_nv12_scaling_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
795 unsigned short srcw, unsigned short srch,
796 unsigned short destw, unsigned short desth)
798 struct i965_driver_data *i965 = i965_driver_data(ctx);
799 struct i965_post_processing_context *pp_context = i965->pp_context;
800 struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
801 struct object_surface *obj_surface;
802 struct i965_sampler_state *sampler_state;
803 struct i965_surface_state *ss;
808 int pp_out_w, pp_out_h;
809 int orig_pp_out_w, orig_pp_out_h;
810 unsigned int tiling, swizzle;
813 obj_surface = SURFACE(surface);
814 orig_w = obj_surface->orig_width;
815 orig_h = obj_surface->orig_height;
816 w = obj_surface->width;
817 h = obj_surface->height;
819 orig_pp_out_w = destw;
820 orig_pp_out_h = desth;
821 pp_out_w = ALIGN(orig_pp_out_w, 16);
822 pp_out_h = ALIGN(orig_pp_out_h, 16);
823 dri_bo_unreference(obj_surface->pp_out_bo);
824 obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
825 "intermediate surface",
826 SIZE_YUV420(pp_out_w, pp_out_h),
828 assert(obj_surface->pp_out_bo);
829 obj_surface->orig_pp_out_width = orig_pp_out_w;
830 obj_surface->orig_pp_out_height = orig_pp_out_h;
831 obj_surface->pp_out_width = pp_out_w;
832 obj_surface->pp_out_height = pp_out_h;
834 /* source Y surface index 1 */
835 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
838 pp_context->surfaces[index].s_bo = obj_surface->bo;
839 dri_bo_reference(pp_context->surfaces[index].s_bo);
840 bo = dri_bo_alloc(i965->intel.bufmgr,
842 sizeof(struct i965_surface_state),
845 pp_context->surfaces[index].ss_bo = bo;
846 dri_bo_map(bo, True);
849 memset(ss, 0, sizeof(*ss));
850 ss->ss0.surface_type = I965_SURFACE_2D;
851 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
852 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
853 ss->ss2.width = orig_w - 1;
854 ss->ss2.height = orig_h - 1;
855 ss->ss3.pitch = w - 1;
856 pp_set_surface_tiling(ss, tiling);
857 dri_bo_emit_reloc(bo,
858 I915_GEM_DOMAIN_RENDER,
861 offsetof(struct i965_surface_state, ss1),
862 pp_context->surfaces[index].s_bo);
865 /* source UV surface index 2 */
867 pp_context->surfaces[index].s_bo = obj_surface->bo;
868 dri_bo_reference(pp_context->surfaces[index].s_bo);
869 bo = dri_bo_alloc(i965->intel.bufmgr,
871 sizeof(struct i965_surface_state),
874 pp_context->surfaces[index].ss_bo = bo;
875 dri_bo_map(bo, True);
878 memset(ss, 0, sizeof(*ss));
879 ss->ss0.surface_type = I965_SURFACE_2D;
880 ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
881 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
882 ss->ss2.width = orig_w / 2 - 1;
883 ss->ss2.height = orig_h / 2 - 1;
884 ss->ss3.pitch = w - 1;
885 pp_set_surface_tiling(ss, tiling);
886 dri_bo_emit_reloc(bo,
887 I915_GEM_DOMAIN_RENDER,
890 offsetof(struct i965_surface_state, ss1),
891 pp_context->surfaces[index].s_bo);
894 /* destination Y surface index 7 */
896 pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
897 dri_bo_reference(pp_context->surfaces[index].s_bo);
898 bo = dri_bo_alloc(i965->intel.bufmgr,
900 sizeof(struct i965_surface_state),
903 pp_context->surfaces[index].ss_bo = bo;
904 dri_bo_map(bo, True);
907 memset(ss, 0, sizeof(*ss));
908 ss->ss0.surface_type = I965_SURFACE_2D;
909 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
910 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
911 ss->ss2.width = pp_out_w / 4 - 1;
912 ss->ss2.height = pp_out_h - 1;
913 ss->ss3.pitch = pp_out_w - 1;
914 dri_bo_emit_reloc(bo,
915 I915_GEM_DOMAIN_RENDER,
916 I915_GEM_DOMAIN_RENDER,
918 offsetof(struct i965_surface_state, ss1),
919 pp_context->surfaces[index].s_bo);
922 /* destination UV surface index 8 */
924 pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
925 dri_bo_reference(pp_context->surfaces[index].s_bo);
926 bo = dri_bo_alloc(i965->intel.bufmgr,
928 sizeof(struct i965_surface_state),
931 pp_context->surfaces[index].ss_bo = bo;
932 dri_bo_map(bo, True);
935 memset(ss, 0, sizeof(*ss));
936 ss->ss0.surface_type = I965_SURFACE_2D;
937 ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
938 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + pp_out_w * pp_out_h;
939 ss->ss2.width = pp_out_w / 4 - 1;
940 ss->ss2.height = pp_out_h / 2 - 1;
941 ss->ss3.pitch = pp_out_w - 1;
942 dri_bo_emit_reloc(bo,
943 I915_GEM_DOMAIN_RENDER,
944 I915_GEM_DOMAIN_RENDER,
946 offsetof(struct i965_surface_state, ss1),
947 pp_context->surfaces[index].s_bo);
951 dri_bo_map(pp_context->sampler_state_table.bo, True);
952 assert(pp_context->sampler_state_table.bo->virtual);
953 sampler_state = pp_context->sampler_state_table.bo->virtual;
955 /* SIMD16 Y index 1 */
956 sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
957 sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
958 sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
959 sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
960 sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
962 /* SIMD16 UV index 2 */
963 sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
964 sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
965 sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
966 sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
967 sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
969 dri_bo_unmap(pp_context->sampler_state_table.bo);
971 /* private function & data */
972 pp_context->pp_x_steps = pp_scaling_x_steps;
973 pp_context->pp_y_steps = pp_scaling_y_steps;
974 pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
976 pp_scaling_context->dest_w = pp_out_w;
977 pp_scaling_context->dest_h = pp_out_h;
979 pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) 1.0 / pp_out_h;
980 pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) 1.0 / pp_out_w;
981 pp_inline_parameter.grf5.block_count_x = pp_out_w / 16; /* 1 x N */
982 pp_inline_parameter.grf5.number_blocks = pp_out_w / 16;
983 pp_inline_parameter.grf5.block_vertical_mask = 0xff;
984 pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
988 pp_avs_x_steps(void *private_context)
990 struct pp_avs_context *pp_avs_context = private_context;
992 return pp_avs_context->dest_w / 16;
996 pp_avs_y_steps(void *private_context)
1002 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1004 struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1005 float src_x_steping, src_y_steping, video_step_delta;
1006 int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
1008 if (tmp_w >= pp_avs_context->dest_w) {
1009 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1010 pp_inline_parameter.grf6.video_step_delta = 0;
1013 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2;
1015 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1016 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1017 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1018 16 * 15 * video_step_delta / 2;
1021 int n0, n1, n2, nls_left, nls_right;
1022 int factor_a = 5, factor_b = 4;
1025 n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1026 n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1027 n2 = tmp_w / (16 * factor_a);
1029 nls_right = n1 + n2;
1030 f = (float) n2 * 16 / tmp_w;
1033 pp_inline_parameter.grf6.video_step_delta = 0.0;
1036 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1037 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = 0.0;
1039 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1040 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1041 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1042 16 * 15 * video_step_delta / 2;
1046 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1047 float a = f / (nls_left * 16 * factor_b);
1048 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1050 pp_inline_parameter.grf6.video_step_delta = b;
1053 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = 0.0;
1054 pp_inline_parameter.grf5.normalized_video_x_scaling_step = a;
1056 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1057 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1058 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1059 16 * 15 * video_step_delta / 2;
1060 pp_inline_parameter.grf5.normalized_video_x_scaling_step += 16 * b;
1062 } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1063 /* scale the center linearly */
1064 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1065 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1066 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1067 16 * 15 * video_step_delta / 2;
1068 pp_inline_parameter.grf6.video_step_delta = 0.0;
1069 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1071 float a = f / (nls_right * 16 * factor_b);
1072 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1074 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1075 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1076 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1077 16 * 15 * video_step_delta / 2;
1078 pp_inline_parameter.grf6.video_step_delta = -b;
1080 if (x == (pp_avs_context->dest_w / 16 - nls_right))
1081 pp_inline_parameter.grf5.normalized_video_x_scaling_step = a + (nls_right * 16 - 1) * b;
1083 pp_inline_parameter.grf5.normalized_video_x_scaling_step -= b * 16;
1088 src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
1089 pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8;
1090 pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1091 pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
1097 pp_nv12_avs_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
1098 unsigned short srcw, unsigned short srch,
1099 unsigned short destw, unsigned short desth)
1101 struct i965_driver_data *i965 = i965_driver_data(ctx);
1102 struct i965_post_processing_context *pp_context = i965->pp_context;
1103 struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1104 struct object_surface *obj_surface;
1105 struct i965_surface_state *ss;
1106 struct i965_sampler_8x8 *sampler_8x8;
1107 struct i965_sampler_8x8_state *sampler_8x8_state;
1108 struct i965_surface_state2 *ss_8x8;
1109 dri_bo *bo, *src_bo;
1113 int pp_out_w, pp_out_h;
1114 int orig_pp_out_w, orig_pp_out_h;
1115 unsigned int tiling, swizzle;
1118 obj_surface = SURFACE(surface);
1121 orig_w = obj_surface->orig_pp_out_width;
1122 orig_h = obj_surface->orig_pp_out_height;
1123 w = obj_surface->pp_out_width;
1124 h = obj_surface->pp_out_height;
1125 src_bo = obj_surface->pp_out_bo;
1127 orig_w = obj_surface->orig_width;
1128 orig_h = obj_surface->orig_height;
1129 w = obj_surface->width;
1130 h = obj_surface->height;
1131 src_bo = obj_surface->bo;
1135 dri_bo_get_tiling(src_bo, &tiling, &swizzle);
1137 /* source Y surface index 1 */
1139 pp_context->surfaces[index].s_bo = src_bo;
1140 dri_bo_reference(pp_context->surfaces[index].s_bo);
1141 bo = dri_bo_alloc(i965->intel.bufmgr,
1142 "Y surface state for sample_8x8",
1143 sizeof(struct i965_surface_state2),
1146 pp_context->surfaces[index].ss_bo = bo;
1147 dri_bo_map(bo, True);
1148 assert(bo->virtual);
1149 ss_8x8 = bo->virtual;
1150 memset(ss_8x8, 0, sizeof(*ss_8x8));
1151 ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
1152 ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
1153 ss_8x8->ss1.width = orig_w - 1;
1154 ss_8x8->ss1.height = orig_h - 1;
1155 ss_8x8->ss2.half_pitch_for_chroma = 0;
1156 ss_8x8->ss2.pitch = w - 1;
1157 ss_8x8->ss2.interleave_chroma = 0;
1158 ss_8x8->ss2.surface_format = SURFACE_FORMAT_Y8_UNORM;
1159 ss_8x8->ss3.x_offset_for_cb = 0;
1160 ss_8x8->ss3.y_offset_for_cb = 0;
1161 pp_set_surface2_tiling(ss_8x8, tiling);
1162 dri_bo_emit_reloc(bo,
1163 I915_GEM_DOMAIN_RENDER,
1166 offsetof(struct i965_surface_state2, ss0),
1167 pp_context->surfaces[index].s_bo);
1170 /* source UV surface index 2 */
1172 pp_context->surfaces[index].s_bo = src_bo;
1173 dri_bo_reference(pp_context->surfaces[index].s_bo);
1174 bo = dri_bo_alloc(i965->intel.bufmgr,
1175 "UV surface state for sample_8x8",
1176 sizeof(struct i965_surface_state2),
1179 pp_context->surfaces[index].ss_bo = bo;
1180 dri_bo_map(bo, True);
1181 assert(bo->virtual);
1182 ss_8x8 = bo->virtual;
1183 memset(ss_8x8, 0, sizeof(*ss_8x8));
1184 ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset + w * h;
1185 ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
1186 ss_8x8->ss1.width = orig_w - 1;
1187 ss_8x8->ss1.height = orig_h - 1;
1188 ss_8x8->ss2.half_pitch_for_chroma = 0;
1189 ss_8x8->ss2.pitch = w - 1;
1190 ss_8x8->ss2.interleave_chroma = 1;
1191 ss_8x8->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
1192 ss_8x8->ss3.x_offset_for_cb = 0;
1193 ss_8x8->ss3.y_offset_for_cb = 0;
1194 pp_set_surface2_tiling(ss_8x8, tiling);
1195 dri_bo_emit_reloc(bo,
1196 I915_GEM_DOMAIN_RENDER,
1199 offsetof(struct i965_surface_state2, ss0),
1200 pp_context->surfaces[index].s_bo);
1203 orig_pp_out_w = destw;
1204 orig_pp_out_h = desth;
1205 pp_out_w = ALIGN(orig_pp_out_w, 16);
1206 pp_out_h = ALIGN(orig_pp_out_h, 16);
1207 dri_bo_unreference(obj_surface->pp_out_bo);
1208 obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
1209 "intermediate surface",
1210 SIZE_YUV420(pp_out_w, pp_out_h),
1212 assert(obj_surface->pp_out_bo);
1213 obj_surface->orig_pp_out_width = orig_pp_out_w;
1214 obj_surface->orig_pp_out_height = orig_pp_out_h;
1215 obj_surface->pp_out_width = pp_out_w;
1216 obj_surface->pp_out_height = pp_out_h;
1218 /* destination Y surface index 7 */
1220 pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1221 dri_bo_reference(pp_context->surfaces[index].s_bo);
1222 bo = dri_bo_alloc(i965->intel.bufmgr,
1224 sizeof(struct i965_surface_state),
1227 pp_context->surfaces[index].ss_bo = bo;
1228 dri_bo_map(bo, True);
1229 assert(bo->virtual);
1231 memset(ss, 0, sizeof(*ss));
1232 ss->ss0.surface_type = I965_SURFACE_2D;
1233 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1234 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1235 ss->ss2.width = pp_out_w / 4 - 1;
1236 ss->ss2.height = pp_out_h - 1;
1237 ss->ss3.pitch = pp_out_w - 1;
1238 dri_bo_emit_reloc(bo,
1239 I915_GEM_DOMAIN_RENDER,
1240 I915_GEM_DOMAIN_RENDER,
1242 offsetof(struct i965_surface_state, ss1),
1243 pp_context->surfaces[index].s_bo);
1246 /* destination UV surface index 8 */
1248 pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1249 dri_bo_reference(pp_context->surfaces[index].s_bo);
1250 bo = dri_bo_alloc(i965->intel.bufmgr,
1252 sizeof(struct i965_surface_state),
1255 pp_context->surfaces[index].ss_bo = bo;
1256 dri_bo_map(bo, True);
1257 assert(bo->virtual);
1259 memset(ss, 0, sizeof(*ss));
1260 ss->ss0.surface_type = I965_SURFACE_2D;
1261 ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1262 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + pp_out_w * pp_out_h;
1263 ss->ss2.width = pp_out_w / 4 - 1;
1264 ss->ss2.height = pp_out_h / 2 - 1;
1265 ss->ss3.pitch = pp_out_w - 1;
1266 dri_bo_emit_reloc(bo,
1267 I915_GEM_DOMAIN_RENDER,
1268 I915_GEM_DOMAIN_RENDER,
1269 pp_out_w * pp_out_h,
1270 offsetof(struct i965_surface_state, ss1),
1271 pp_context->surfaces[index].s_bo);
1274 /* sampler 8x8 state */
1275 dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1276 assert(pp_context->sampler_state_table.bo_8x8->virtual);
1277 assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1278 sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1279 memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1280 sampler_8x8_state->dw136.default_sharpness_level = 0;
1281 sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1282 sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1283 sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1284 dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1287 dri_bo_map(pp_context->sampler_state_table.bo, True);
1288 assert(pp_context->sampler_state_table.bo->virtual);
1289 assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
1290 sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1292 /* sample_8x8 Y index 1 */
1294 memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1295 sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1296 sampler_8x8[index].dw0.ief_bypass = 0;
1297 sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1298 sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1299 sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1300 sampler_8x8[index].dw2.global_noise_estimation = 22;
1301 sampler_8x8[index].dw2.strong_edge_threshold = 8;
1302 sampler_8x8[index].dw2.weak_edge_threshold = 1;
1303 sampler_8x8[index].dw3.strong_edge_weight = 7;
1304 sampler_8x8[index].dw3.regular_weight = 2;
1305 sampler_8x8[index].dw3.non_edge_weight = 0;
1306 sampler_8x8[index].dw3.gain_factor = 40;
1307 sampler_8x8[index].dw4.steepness_boost = 0;
1308 sampler_8x8[index].dw4.steepness_threshold = 0;
1309 sampler_8x8[index].dw4.mr_boost = 0;
1310 sampler_8x8[index].dw4.mr_threshold = 5;
1311 sampler_8x8[index].dw5.pwl1_point_1 = 4;
1312 sampler_8x8[index].dw5.pwl1_point_2 = 12;
1313 sampler_8x8[index].dw5.pwl1_point_3 = 16;
1314 sampler_8x8[index].dw5.pwl1_point_4 = 26;
1315 sampler_8x8[index].dw6.pwl1_point_5 = 40;
1316 sampler_8x8[index].dw6.pwl1_point_6 = 160;
1317 sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1318 sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1319 sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1320 sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1321 sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1322 sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1323 sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1324 sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1325 sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1326 sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1327 sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1328 sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1329 sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1330 sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1331 sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1332 sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1333 sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1334 sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1335 sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1336 sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1337 sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1338 sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1339 sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1340 sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1341 sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1342 sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1343 sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1344 sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1345 sampler_8x8[index].dw13.limiter_boost = 0;
1346 sampler_8x8[index].dw13.minimum_limiter = 10;
1347 sampler_8x8[index].dw13.maximum_limiter = 11;
1348 sampler_8x8[index].dw14.clip_limiter = 130;
1349 dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1350 I915_GEM_DOMAIN_RENDER,
1353 sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1354 pp_context->sampler_state_table.bo_8x8);
1356 dri_bo_map(pp_context->sampler_state_table.bo_8x8_uv, True);
1357 assert(pp_context->sampler_state_table.bo_8x8_uv->virtual);
1358 assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1359 sampler_8x8_state = pp_context->sampler_state_table.bo_8x8_uv->virtual;
1360 memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1361 sampler_8x8_state->dw136.default_sharpness_level = 0;
1362 sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0;
1363 sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1364 sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1365 dri_bo_unmap(pp_context->sampler_state_table.bo_8x8_uv);
1367 /* sample_8x8 UV index 2 */
1369 memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1370 sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_NEAREST;
1371 sampler_8x8[index].dw0.ief_bypass = 0;
1372 sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1373 sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1374 sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8_uv->offset >> 5;
1375 sampler_8x8[index].dw2.global_noise_estimation = 22;
1376 sampler_8x8[index].dw2.strong_edge_threshold = 8;
1377 sampler_8x8[index].dw2.weak_edge_threshold = 1;
1378 sampler_8x8[index].dw3.strong_edge_weight = 7;
1379 sampler_8x8[index].dw3.regular_weight = 2;
1380 sampler_8x8[index].dw3.non_edge_weight = 0;
1381 sampler_8x8[index].dw3.gain_factor = 40;
1382 sampler_8x8[index].dw4.steepness_boost = 0;
1383 sampler_8x8[index].dw4.steepness_threshold = 0;
1384 sampler_8x8[index].dw4.mr_boost = 0;
1385 sampler_8x8[index].dw4.mr_threshold = 5;
1386 sampler_8x8[index].dw5.pwl1_point_1 = 4;
1387 sampler_8x8[index].dw5.pwl1_point_2 = 12;
1388 sampler_8x8[index].dw5.pwl1_point_3 = 16;
1389 sampler_8x8[index].dw5.pwl1_point_4 = 26;
1390 sampler_8x8[index].dw6.pwl1_point_5 = 40;
1391 sampler_8x8[index].dw6.pwl1_point_6 = 160;
1392 sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1393 sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1394 sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1395 sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1396 sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1397 sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1398 sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1399 sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1400 sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1401 sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1402 sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1403 sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1404 sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1405 sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1406 sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1407 sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1408 sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1409 sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1410 sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1411 sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1412 sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1413 sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1414 sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1415 sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1416 sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1417 sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1418 sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1419 sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1420 sampler_8x8[index].dw13.limiter_boost = 0;
1421 sampler_8x8[index].dw13.minimum_limiter = 10;
1422 sampler_8x8[index].dw13.maximum_limiter = 11;
1423 sampler_8x8[index].dw14.clip_limiter = 130;
1424 dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1425 I915_GEM_DOMAIN_RENDER,
1428 sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1429 pp_context->sampler_state_table.bo_8x8_uv);
1431 dri_bo_unmap(pp_context->sampler_state_table.bo);
1433 /* private function & data */
1434 pp_context->pp_x_steps = pp_avs_x_steps;
1435 pp_context->pp_y_steps = pp_avs_y_steps;
1436 pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
1438 pp_avs_context->dest_w = pp_out_w;
1439 pp_avs_context->dest_h = pp_out_h;
1440 pp_avs_context->src_w = w;
1441 pp_avs_context->src_h = h;
1443 pp_static_parameter.grf4.r4_2.avs.nlas = 1;
1444 pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) 1.0 / pp_out_h;
1445 pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) 1.0 / pp_out_w;
1446 pp_inline_parameter.grf5.block_count_x = 1; /* M x 1 */
1447 pp_inline_parameter.grf5.number_blocks = pp_out_h / 8;
1448 pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1449 pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1450 pp_inline_parameter.grf6.video_step_delta = 0.0;
1454 pp_dndi_x_steps(void *private_context)
1460 pp_dndi_y_steps(void *private_context)
1462 struct pp_dndi_context *pp_dndi_context = private_context;
1464 return pp_dndi_context->dest_h / 4;
1468 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1470 pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1471 pp_inline_parameter.grf5.destination_block_vertical_origin = y * 4;
1477 void pp_nv12_dndi_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
1478 unsigned short srcw, unsigned short srch,
1479 unsigned short destw, unsigned short desth)
1481 struct i965_driver_data *i965 = i965_driver_data(ctx);
1482 struct i965_post_processing_context *pp_context = i965->pp_context;
1483 struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
1484 struct object_surface *obj_surface;
1485 struct i965_surface_state *ss;
1486 struct i965_surface_state2 *ss_dndi;
1487 struct i965_sampler_dndi *sampler_dndi;
1492 unsigned int tiling, swizzle;
1495 obj_surface = SURFACE(surface);
1496 orig_w = obj_surface->orig_width;
1497 orig_h = obj_surface->orig_height;
1498 w = obj_surface->width;
1499 h = obj_surface->height;
1501 if (pp_context->stmm.bo == NULL) {
1502 pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
1506 assert(pp_context->stmm.bo);
1509 dri_bo_unreference(obj_surface->pp_out_bo);
1510 obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
1511 "intermediate surface",
1514 assert(obj_surface->pp_out_bo);
1515 obj_surface->orig_pp_out_width = orig_w;
1516 obj_surface->orig_pp_out_height = orig_h;
1517 obj_surface->pp_out_width = w;
1518 obj_surface->pp_out_height = h;
1520 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
1521 /* source UV surface index 2 */
1523 pp_context->surfaces[index].s_bo = obj_surface->bo;
1524 dri_bo_reference(pp_context->surfaces[index].s_bo);
1525 bo = dri_bo_alloc(i965->intel.bufmgr,
1527 sizeof(struct i965_surface_state),
1530 pp_context->surfaces[index].ss_bo = bo;
1531 dri_bo_map(bo, True);
1532 assert(bo->virtual);
1534 memset(ss, 0, sizeof(*ss));
1535 ss->ss0.surface_type = I965_SURFACE_2D;
1536 ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1537 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1538 ss->ss2.width = orig_w / 4 - 1;
1539 ss->ss2.height = orig_h / 2 - 1;
1540 ss->ss3.pitch = w - 1;
1541 pp_set_surface_tiling(ss, tiling);
1542 dri_bo_emit_reloc(bo,
1543 I915_GEM_DOMAIN_RENDER,
1546 offsetof(struct i965_surface_state, ss1),
1547 pp_context->surfaces[index].s_bo);
1550 /* source YUV surface index 4 */
1552 pp_context->surfaces[index].s_bo = obj_surface->bo;
1553 dri_bo_reference(pp_context->surfaces[index].s_bo);
1554 bo = dri_bo_alloc(i965->intel.bufmgr,
1555 "YUV surface state for deinterlace ",
1556 sizeof(struct i965_surface_state2),
1559 pp_context->surfaces[index].ss_bo = bo;
1560 dri_bo_map(bo, True);
1561 assert(bo->virtual);
1562 ss_dndi = bo->virtual;
1563 memset(ss_dndi, 0, sizeof(*ss_dndi));
1564 ss_dndi->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
1565 ss_dndi->ss1.cbcr_pixel_offset_v_direction = 0;
1566 ss_dndi->ss1.width = w - 1;
1567 ss_dndi->ss1.height = h - 1;
1568 ss_dndi->ss1.cbcr_pixel_offset_v_direction = 1;
1569 ss_dndi->ss2.half_pitch_for_chroma = 0;
1570 ss_dndi->ss2.pitch = w - 1;
1571 ss_dndi->ss2.interleave_chroma = 1;
1572 ss_dndi->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
1573 ss_dndi->ss2.half_pitch_for_chroma = 0;
1574 ss_dndi->ss2.tiled_surface = 0;
1575 ss_dndi->ss3.x_offset_for_cb = 0;
1576 ss_dndi->ss3.y_offset_for_cb = h;
1577 pp_set_surface2_tiling(ss_dndi, tiling);
1578 dri_bo_emit_reloc(bo,
1579 I915_GEM_DOMAIN_RENDER,
1582 offsetof(struct i965_surface_state2, ss0),
1583 pp_context->surfaces[index].s_bo);
1586 /* source STMM surface index 20 */
1588 pp_context->surfaces[index].s_bo = pp_context->stmm.bo;
1589 dri_bo_reference(pp_context->surfaces[index].s_bo);
1590 bo = dri_bo_alloc(i965->intel.bufmgr,
1591 "STMM surface state for deinterlace ",
1592 sizeof(struct i965_surface_state2),
1595 pp_context->surfaces[index].ss_bo = bo;
1596 dri_bo_map(bo, True);
1597 assert(bo->virtual);
1599 memset(ss, 0, sizeof(*ss));
1600 ss->ss0.surface_type = I965_SURFACE_2D;
1601 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1602 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1603 ss->ss2.width = w - 1;
1604 ss->ss2.height = h - 1;
1605 ss->ss3.pitch = w - 1;
1606 dri_bo_emit_reloc(bo,
1607 I915_GEM_DOMAIN_RENDER,
1608 I915_GEM_DOMAIN_RENDER,
1610 offsetof(struct i965_surface_state, ss1),
1611 pp_context->surfaces[index].s_bo);
1614 /* destination Y surface index 7 */
1616 pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1617 dri_bo_reference(pp_context->surfaces[index].s_bo);
1618 bo = dri_bo_alloc(i965->intel.bufmgr,
1620 sizeof(struct i965_surface_state),
1623 pp_context->surfaces[index].ss_bo = bo;
1624 dri_bo_map(bo, True);
1625 assert(bo->virtual);
1627 memset(ss, 0, sizeof(*ss));
1628 ss->ss0.surface_type = I965_SURFACE_2D;
1629 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1630 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1631 ss->ss2.width = w / 4 - 1;
1632 ss->ss2.height = h - 1;
1633 ss->ss3.pitch = w - 1;
1634 dri_bo_emit_reloc(bo,
1635 I915_GEM_DOMAIN_RENDER,
1636 I915_GEM_DOMAIN_RENDER,
1638 offsetof(struct i965_surface_state, ss1),
1639 pp_context->surfaces[index].s_bo);
1642 /* destination UV surface index 8 */
1644 pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1645 dri_bo_reference(pp_context->surfaces[index].s_bo);
1646 bo = dri_bo_alloc(i965->intel.bufmgr,
1648 sizeof(struct i965_surface_state),
1651 pp_context->surfaces[index].ss_bo = bo;
1652 dri_bo_map(bo, True);
1653 assert(bo->virtual);
1655 memset(ss, 0, sizeof(*ss));
1656 ss->ss0.surface_type = I965_SURFACE_2D;
1657 ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1658 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1659 ss->ss2.width = w / 4 - 1;
1660 ss->ss2.height = h / 2 - 1;
1661 ss->ss3.pitch = w - 1;
1662 dri_bo_emit_reloc(bo,
1663 I915_GEM_DOMAIN_RENDER,
1664 I915_GEM_DOMAIN_RENDER,
1666 offsetof(struct i965_surface_state, ss1),
1667 pp_context->surfaces[index].s_bo);
1671 dri_bo_map(pp_context->sampler_state_table.bo, True);
1672 assert(pp_context->sampler_state_table.bo->virtual);
1673 assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
1674 sampler_dndi = pp_context->sampler_state_table.bo->virtual;
1676 /* sample dndi index 1 */
1678 sampler_dndi[index].dw0.denoise_asd_threshold = 0;
1679 sampler_dndi[index].dw0.denoise_history_delta = 8; // 0-15, default is 8
1680 sampler_dndi[index].dw0.denoise_maximum_history = 128; // 128-240
1681 sampler_dndi[index].dw0.denoise_stad_threshold = 0;
1683 sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
1684 sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
1685 sampler_dndi[index].dw1.stmm_c2 = 0;
1686 sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
1687 sampler_dndi[index].dw1.temporal_difference_threshold = 16;
1689 sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15; // 0-31
1690 sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7; // 0-15
1691 sampler_dndi[index].dw2.denoise_edge_threshold = 7; // 0-15
1692 sampler_dndi[index].dw2.good_neighbor_threshold = 7; // 0-63
1694 sampler_dndi[index].dw3.maximum_stmm = 128;
1695 sampler_dndi[index].dw3.multipler_for_vecm = 2;
1696 sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
1697 sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
1698 sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
1700 sampler_dndi[index].dw4.sdi_delta = 8;
1701 sampler_dndi[index].dw4.sdi_threshold = 128;
1702 sampler_dndi[index].dw4.stmm_output_shift = 7; // stmm_max - stmm_min = 2 ^ stmm_output_shift
1703 sampler_dndi[index].dw4.stmm_shift_up = 0;
1704 sampler_dndi[index].dw4.stmm_shift_down = 0;
1705 sampler_dndi[index].dw4.minimum_stmm = 0;
1707 sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
1708 sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
1709 sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
1710 sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
1712 sampler_dndi[index].dw6.dn_enable = 1;
1713 sampler_dndi[index].dw6.di_enable = 1;
1714 sampler_dndi[index].dw6.di_partial = 0;
1715 sampler_dndi[index].dw6.dndi_top_first = 1;
1716 sampler_dndi[index].dw6.dndi_stream_id = 1;
1717 sampler_dndi[index].dw6.dndi_first_frame = 1;
1718 sampler_dndi[index].dw6.progressive_dn = 0;
1719 sampler_dndi[index].dw6.fmd_tear_threshold = 32;
1720 sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
1721 sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
1723 sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
1724 sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
1725 sampler_dndi[index].dw7.vdi_walker_enable = 0;
1726 sampler_dndi[index].dw7.column_width_minus1 = w / 16;
1728 dri_bo_unmap(pp_context->sampler_state_table.bo);
1730 /* private function & data */
1731 pp_context->pp_x_steps = pp_dndi_x_steps;
1732 pp_context->pp_y_steps = pp_dndi_y_steps;
1733 pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
1735 pp_static_parameter.grf1.statistics_surface_picth = w / 2;
1736 pp_static_parameter.grf1.r1_6.di.top_field_first = 0;
1737 pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m2 = 64;
1738 pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m1 = 192;
1740 pp_inline_parameter.grf5.block_count_x = w / 16; /* 1 x N */
1741 pp_inline_parameter.grf5.number_blocks = w / 16;
1742 pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1743 pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1745 pp_dndi_context->dest_w = w;
1746 pp_dndi_context->dest_h = h;
1750 ironlake_pp_initialize(VADriverContextP ctx,
1751 VASurfaceID surface,
1755 unsigned short srcw,
1756 unsigned short srch,
1759 unsigned short destw,
1760 unsigned short desth,
1763 struct i965_driver_data *i965 = i965_driver_data(ctx);
1764 struct i965_post_processing_context *pp_context = i965->pp_context;
1765 struct pp_module *pp_module;
1769 dri_bo_unreference(pp_context->curbe.bo);
1770 bo = dri_bo_alloc(i965->intel.bufmgr,
1775 pp_context->curbe.bo = bo;
1777 dri_bo_unreference(pp_context->binding_table.bo);
1778 bo = dri_bo_alloc(i965->intel.bufmgr,
1780 sizeof(unsigned int),
1783 pp_context->binding_table.bo = bo;
1785 dri_bo_unreference(pp_context->idrt.bo);
1786 bo = dri_bo_alloc(i965->intel.bufmgr,
1787 "interface discriptor",
1788 sizeof(struct i965_interface_descriptor),
1791 pp_context->idrt.bo = bo;
1792 pp_context->idrt.num_interface_descriptors = 0;
1794 dri_bo_unreference(pp_context->sampler_state_table.bo);
1795 bo = dri_bo_alloc(i965->intel.bufmgr,
1796 "sampler state table",
1800 dri_bo_map(bo, True);
1801 memset(bo->virtual, 0, bo->size);
1803 pp_context->sampler_state_table.bo = bo;
1805 dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1806 bo = dri_bo_alloc(i965->intel.bufmgr,
1807 "sampler 8x8 state ",
1811 pp_context->sampler_state_table.bo_8x8 = bo;
1813 dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1814 bo = dri_bo_alloc(i965->intel.bufmgr,
1815 "sampler 8x8 state ",
1819 pp_context->sampler_state_table.bo_8x8_uv = bo;
1821 dri_bo_unreference(pp_context->vfe_state.bo);
1822 bo = dri_bo_alloc(i965->intel.bufmgr,
1824 sizeof(struct i965_vfe_state),
1827 pp_context->vfe_state.bo = bo;
1829 for (i = 0; i < MAX_PP_SURFACES; i++) {
1830 dri_bo_unreference(pp_context->surfaces[i].ss_bo);
1831 pp_context->surfaces[i].ss_bo = NULL;
1833 dri_bo_unreference(pp_context->surfaces[i].s_bo);
1834 pp_context->surfaces[i].s_bo = NULL;
1837 memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
1838 memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
1839 assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
1840 pp_context->current_pp = pp_index;
1841 pp_module = &pp_context->pp_modules[pp_index];
1843 if (pp_module->initialize)
1844 pp_module->initialize(ctx, surface, input, srcw, srch, destw, desth);
1848 ironlake_post_processing(VADriverContextP ctx,
1849 VASurfaceID surface,
1853 unsigned short srcw,
1854 unsigned short srch,
1857 unsigned short destw,
1858 unsigned short desth,
1861 ironlake_pp_initialize(ctx, surface, input,
1862 srcx, srcy, srcw, srch,
1863 destx, desty, destw, desth,
1865 ironlake_pp_states_setup(ctx);
1866 ironlake_pp_pipeline_setup(ctx);
1870 gen6_pp_initialize(VADriverContextP ctx,
1871 VASurfaceID surface,
1875 unsigned short srcw,
1876 unsigned short srch,
1879 unsigned short destw,
1880 unsigned short desth,
1883 struct i965_driver_data *i965 = i965_driver_data(ctx);
1884 struct i965_post_processing_context *pp_context = i965->pp_context;
1885 struct pp_module *pp_module;
1889 dri_bo_unreference(pp_context->curbe.bo);
1890 bo = dri_bo_alloc(i965->intel.bufmgr,
1895 pp_context->curbe.bo = bo;
1897 dri_bo_unreference(pp_context->binding_table.bo);
1898 bo = dri_bo_alloc(i965->intel.bufmgr,
1900 sizeof(unsigned int),
1903 pp_context->binding_table.bo = bo;
1905 dri_bo_unreference(pp_context->idrt.bo);
1906 bo = dri_bo_alloc(i965->intel.bufmgr,
1907 "interface discriptor",
1908 sizeof(struct gen6_interface_descriptor_data),
1911 pp_context->idrt.bo = bo;
1912 pp_context->idrt.num_interface_descriptors = 0;
1914 dri_bo_unreference(pp_context->sampler_state_table.bo);
1915 bo = dri_bo_alloc(i965->intel.bufmgr,
1916 "sampler state table",
1920 dri_bo_map(bo, True);
1921 memset(bo->virtual, 0, bo->size);
1923 pp_context->sampler_state_table.bo = bo;
1925 dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1926 bo = dri_bo_alloc(i965->intel.bufmgr,
1927 "sampler 8x8 state ",
1931 pp_context->sampler_state_table.bo_8x8 = bo;
1933 dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1934 bo = dri_bo_alloc(i965->intel.bufmgr,
1935 "sampler 8x8 state ",
1939 pp_context->sampler_state_table.bo_8x8_uv = bo;
1941 dri_bo_unreference(pp_context->vfe_state.bo);
1942 bo = dri_bo_alloc(i965->intel.bufmgr,
1944 sizeof(struct i965_vfe_state),
1947 pp_context->vfe_state.bo = bo;
1949 for (i = 0; i < MAX_PP_SURFACES; i++) {
1950 dri_bo_unreference(pp_context->surfaces[i].ss_bo);
1951 pp_context->surfaces[i].ss_bo = NULL;
1953 dri_bo_unreference(pp_context->surfaces[i].s_bo);
1954 pp_context->surfaces[i].s_bo = NULL;
1957 memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
1958 memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
1959 assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
1960 pp_context->current_pp = pp_index;
1961 pp_module = &pp_context->pp_modules[pp_index];
1963 if (pp_module->initialize)
1964 pp_module->initialize(ctx, surface, input, srcw, srch, destw, desth);
1968 gen6_pp_binding_table(struct i965_post_processing_context *pp_context)
1970 unsigned int *binding_table;
1971 dri_bo *bo = pp_context->binding_table.bo;
1975 assert(bo->virtual);
1976 binding_table = bo->virtual;
1977 memset(binding_table, 0, bo->size);
1979 for (i = 0; i < MAX_PP_SURFACES; i++) {
1980 if (pp_context->surfaces[i].ss_bo) {
1981 assert(pp_context->surfaces[i].s_bo);
1983 binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
1984 dri_bo_emit_reloc(bo,
1985 I915_GEM_DOMAIN_INSTRUCTION, 0,
1987 i * sizeof(*binding_table),
1988 pp_context->surfaces[i].ss_bo);
1997 gen6_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1999 struct gen6_interface_descriptor_data *desc;
2001 int pp_index = pp_context->current_pp;
2003 bo = pp_context->idrt.bo;
2004 dri_bo_map(bo, True);
2005 assert(bo->virtual);
2007 memset(desc, 0, sizeof(*desc));
2008 desc->desc0.kernel_start_pointer =
2009 pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
2010 desc->desc1.single_program_flow = 1;
2011 desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
2012 desc->desc2.sampler_count = 1; /* 1 - 4 samplers used */
2013 desc->desc2.sampler_state_pointer =
2014 pp_context->sampler_state_table.bo->offset >> 5;
2015 desc->desc3.binding_table_entry_count = 0;
2016 desc->desc3.binding_table_pointer =
2017 pp_context->binding_table.bo->offset >> 5; /*reloc */
2018 desc->desc4.constant_urb_entry_read_offset = 0;
2019 desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
2021 dri_bo_emit_reloc(bo,
2022 I915_GEM_DOMAIN_INSTRUCTION, 0,
2024 offsetof(struct gen6_interface_descriptor_data, desc0),
2025 pp_context->pp_modules[pp_index].kernel.bo);
2027 dri_bo_emit_reloc(bo,
2028 I915_GEM_DOMAIN_INSTRUCTION, 0,
2029 desc->desc2.sampler_count << 2,
2030 offsetof(struct gen6_interface_descriptor_data, desc2),
2031 pp_context->sampler_state_table.bo);
2033 dri_bo_emit_reloc(bo,
2034 I915_GEM_DOMAIN_INSTRUCTION, 0,
2035 desc->desc3.binding_table_entry_count,
2036 offsetof(struct gen6_interface_descriptor_data, desc3),
2037 pp_context->binding_table.bo);
2040 pp_context->idrt.num_interface_descriptors++;
2044 gen6_pp_upload_constants(struct i965_post_processing_context *pp_context)
2046 unsigned char *constant_buffer;
2048 assert(sizeof(pp_static_parameter) == 128);
2049 dri_bo_map(pp_context->curbe.bo, 1);
2050 assert(pp_context->curbe.bo->virtual);
2051 constant_buffer = pp_context->curbe.bo->virtual;
2052 memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
2053 dri_bo_unmap(pp_context->curbe.bo);
2057 gen6_pp_states_setup(VADriverContextP ctx)
2059 struct i965_driver_data *i965 = i965_driver_data(ctx);
2060 struct i965_post_processing_context *pp_context = i965->pp_context;
2062 gen6_pp_binding_table(pp_context);
2063 gen6_pp_interface_descriptor_table(pp_context);
2064 gen6_pp_upload_constants(pp_context);
2068 gen6_pp_pipeline_select(VADriverContextP ctx)
2070 struct i965_driver_data *i965 = i965_driver_data(ctx);
2071 struct intel_batchbuffer *batch = i965->batch;
2073 BEGIN_BATCH(batch, 1);
2074 OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
2075 ADVANCE_BATCH(batch);
2079 gen6_pp_state_base_address(VADriverContextP ctx)
2081 struct i965_driver_data *i965 = i965_driver_data(ctx);
2082 struct intel_batchbuffer *batch = i965->batch;
2084 BEGIN_BATCH(batch, 10);
2085 OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2086 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2087 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2088 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2089 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2090 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2091 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2092 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2093 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2094 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2095 ADVANCE_BATCH(batch);
2099 gen6_pp_vfe_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2101 struct i965_driver_data *i965 = i965_driver_data(ctx);
2102 struct intel_batchbuffer *batch = i965->batch;
2104 BEGIN_BATCH(batch, 8);
2105 OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
2106 OUT_BATCH(batch, 0);
2108 (pp_context->urb.num_vfe_entries - 1) << 16 |
2109 pp_context->urb.num_vfe_entries << 8);
2110 OUT_BATCH(batch, 0);
2112 (pp_context->urb.size_vfe_entry * 2) << 16 | /* in 256 bits unit */
2113 (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 - 1)); /* in 256 bits unit */
2114 OUT_BATCH(batch, 0);
2115 OUT_BATCH(batch, 0);
2116 OUT_BATCH(batch, 0);
2117 ADVANCE_BATCH(batch);
2121 gen6_pp_curbe_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2123 struct i965_driver_data *i965 = i965_driver_data(ctx);
2124 struct intel_batchbuffer *batch = i965->batch;
2126 assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512 <= pp_context->curbe.bo->size);
2128 BEGIN_BATCH(batch, 4);
2129 OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
2130 OUT_BATCH(batch, 0);
2132 pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512);
2134 pp_context->curbe.bo,
2135 I915_GEM_DOMAIN_INSTRUCTION, 0,
2137 ADVANCE_BATCH(batch);
2141 gen6_interface_descriptor_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2143 struct i965_driver_data *i965 = i965_driver_data(ctx);
2144 struct intel_batchbuffer *batch = i965->batch;
2146 BEGIN_BATCH(batch, 4);
2147 OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
2148 OUT_BATCH(batch, 0);
2150 pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
2152 pp_context->idrt.bo,
2153 I915_GEM_DOMAIN_INSTRUCTION, 0,
2155 ADVANCE_BATCH(batch);
2159 gen6_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2161 struct i965_driver_data *i965 = i965_driver_data(ctx);
2162 struct intel_batchbuffer *batch = i965->batch;
2163 int x, x_steps, y, y_steps;
2165 x_steps = pp_context->pp_x_steps(&pp_context->private_context);
2166 y_steps = pp_context->pp_y_steps(&pp_context->private_context);
2168 for (y = 0; y < y_steps; y++) {
2169 for (x = 0; x < x_steps; x++) {
2170 if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
2171 BEGIN_BATCH(batch, 22);
2172 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 20);
2173 OUT_BATCH(batch, 0);
2174 OUT_BATCH(batch, 0); /* no indirect data */
2175 OUT_BATCH(batch, 0);
2176 OUT_BATCH(batch, 0); /* scoreboard */
2177 OUT_BATCH(batch, 0);
2179 /* inline data grf 5-6 */
2180 assert(sizeof(pp_inline_parameter) == 64);
2181 intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
2183 ADVANCE_BATCH(batch);
2190 gen6_pp_pipeline_setup(VADriverContextP ctx)
2192 struct i965_driver_data *i965 = i965_driver_data(ctx);
2193 struct intel_batchbuffer *batch = i965->batch;
2194 struct i965_post_processing_context *pp_context = i965->pp_context;
2196 intel_batchbuffer_start_atomic(batch, 0x1000);
2197 intel_batchbuffer_emit_mi_flush(batch);
2198 gen6_pp_pipeline_select(ctx);
2199 gen6_pp_curbe_load(ctx, pp_context);
2200 gen6_interface_descriptor_load(ctx, pp_context);
2201 gen6_pp_state_base_address(ctx);
2202 gen6_pp_vfe_state(ctx, pp_context);
2203 gen6_pp_object_walker(ctx, pp_context);
2204 intel_batchbuffer_end_atomic(batch);
2208 gen6_post_processing(VADriverContextP ctx,
2209 VASurfaceID surface,
2213 unsigned short srcw,
2214 unsigned short srch,
2217 unsigned short destw,
2218 unsigned short desth,
2221 gen6_pp_initialize(ctx, surface, input,
2222 srcx, srcy, srcw, srch,
2223 destx, desty, destw, desth,
2225 gen6_pp_states_setup(ctx);
2226 gen6_pp_pipeline_setup(ctx);
2230 i965_post_processing_internal(VADriverContextP ctx,
2231 VASurfaceID surface,
2235 unsigned short srcw,
2236 unsigned short srch,
2239 unsigned short destw,
2240 unsigned short desth,
2243 struct i965_driver_data *i965 = i965_driver_data(ctx);
2245 if (IS_GEN6(i965->intel.device_id))
2246 gen6_post_processing(ctx, surface, input,
2247 srcx, srcy, srcw, srch,
2248 destx, desty, destw, desth,
2251 ironlake_post_processing(ctx, surface, input,
2252 srcx, srcy, srcw, srch,
2253 destx, desty, destw, desth,
2258 i965_post_processing(VADriverContextP ctx,
2259 VASurfaceID surface,
2262 unsigned short srcw,
2263 unsigned short srch,
2266 unsigned short destw,
2267 unsigned short desth,
2270 struct i965_driver_data *i965 = i965_driver_data(ctx);
2273 /* Currently only support post processing for NV12 surface */
2274 if (i965->render_state.interleaved_uv) {
2275 int internal_input = 0;
2277 if (flag & I965_PP_FLAG_DEINTERLACING) {
2278 i965_post_processing_internal(ctx, surface, internal_input,
2279 srcx, srcy, srcw, srch,
2280 destx, desty, destw, desth,
2285 if (flag & I965_PP_FLAG_AVS) {
2286 i965_post_processing_internal(ctx, surface, internal_input,
2287 srcx, srcy, srcw, srch,
2288 destx, desty, destw, desth,
2296 i965_post_processing_terminate(VADriverContextP ctx)
2298 struct i965_driver_data *i965 = i965_driver_data(ctx);
2299 struct i965_post_processing_context *pp_context = i965->pp_context;
2304 dri_bo_unreference(pp_context->curbe.bo);
2305 pp_context->curbe.bo = NULL;
2307 for (i = 0; i < MAX_PP_SURFACES; i++) {
2308 dri_bo_unreference(pp_context->surfaces[i].ss_bo);
2309 pp_context->surfaces[i].ss_bo = NULL;
2311 dri_bo_unreference(pp_context->surfaces[i].s_bo);
2312 pp_context->surfaces[i].s_bo = NULL;
2315 dri_bo_unreference(pp_context->sampler_state_table.bo);
2316 pp_context->sampler_state_table.bo = NULL;
2318 dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
2319 pp_context->sampler_state_table.bo_8x8 = NULL;
2321 dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
2322 pp_context->sampler_state_table.bo_8x8_uv = NULL;
2324 dri_bo_unreference(pp_context->binding_table.bo);
2325 pp_context->binding_table.bo = NULL;
2327 dri_bo_unreference(pp_context->idrt.bo);
2328 pp_context->idrt.bo = NULL;
2329 pp_context->idrt.num_interface_descriptors = 0;
2331 dri_bo_unreference(pp_context->vfe_state.bo);
2332 pp_context->vfe_state.bo = NULL;
2334 dri_bo_unreference(pp_context->stmm.bo);
2335 pp_context->stmm.bo = NULL;
2337 for (i = 0; i < NUM_PP_MODULES; i++) {
2338 struct pp_module *pp_module = &pp_context->pp_modules[i];
2340 dri_bo_unreference(pp_module->kernel.bo);
2341 pp_module->kernel.bo = NULL;
2347 i965->pp_context = NULL;
2354 i965_post_processing_init(VADriverContextP ctx)
2356 struct i965_driver_data *i965 = i965_driver_data(ctx);
2357 struct i965_post_processing_context *pp_context = i965->pp_context;
2361 if (pp_context == NULL) {
2362 pp_context = calloc(1, sizeof(*pp_context));
2363 i965->pp_context = pp_context;
2365 pp_context->urb.size = URB_SIZE((&i965->intel));
2366 pp_context->urb.num_vfe_entries = 32;
2367 pp_context->urb.size_vfe_entry = 1; /* in 512 bits unit */
2368 pp_context->urb.num_cs_entries = 1;
2369 pp_context->urb.size_cs_entry = 2; /* in 512 bits unit */
2370 pp_context->urb.vfe_start = 0;
2371 pp_context->urb.cs_start = pp_context->urb.vfe_start +
2372 pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
2373 assert(pp_context->urb.cs_start +
2374 pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
2376 assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
2377 assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
2379 if (IS_GEN6(i965->intel.device_id))
2380 memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
2381 else if (IS_IRONLAKE(i965->intel.device_id))
2382 memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
2384 for (i = 0; i < NUM_PP_MODULES; i++) {
2385 struct pp_module *pp_module = &pp_context->pp_modules[i];
2386 dri_bo_unreference(pp_module->kernel.bo);
2387 pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
2388 pp_module->kernel.name,
2389 pp_module->kernel.size,
2391 assert(pp_module->kernel.bo);
2392 dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);