2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
34 #include <va/va_backend.h>
36 #include "intel_batchbuffer.h"
37 #include "intel_driver.h"
38 #include "i965_defines.h"
39 #include "i965_structs.h"
41 #include "i965_post_processing.h"
42 #include "i965_render.h"
43 #include "i965_drv_video.h"
45 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) || \
46 IS_GEN6((ctx)->intel.device_id))
53 unsigned int (*bin)[4];
58 void (*initialize)(VADriverContextP ctx, VASurfaceID surface, int input,
59 unsigned short srcw, unsigned short srch,
60 unsigned short destw, unsigned short desth);
63 static uint32_t pp_null_gen5[][4] = {
64 #include "shaders/post_processing/null.g4b.gen5"
67 static uint32_t pp_nv12_load_save_gen5[][4] = {
68 #include "shaders/post_processing/nv12_load_save_nv12.g4b.gen5"
71 static uint32_t pp_nv12_scaling_gen5[][4] = {
72 #include "shaders/post_processing/nv12_scaling_nv12.g4b.gen5"
75 static uint32_t pp_nv12_avs_gen5[][4] = {
76 #include "shaders/post_processing/nv12_avs_nv12.g4b.gen5"
79 static uint32_t pp_nv12_dndi_gen5[][4] = {
80 #include "shaders/post_processing/nv12_dndi_nv12.g4b.gen5"
83 static void pp_null_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
84 unsigned short srcw, unsigned short srch,
85 unsigned short destw, unsigned short desth);
86 static void pp_nv12_avs_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
87 unsigned short srcw, unsigned short srch,
88 unsigned short destw, unsigned short desth);
89 static void pp_nv12_scaling_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
90 unsigned short srcw, unsigned short srch,
91 unsigned short destw, unsigned short desth);
92 static void pp_nv12_load_save_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
93 unsigned short srcw, unsigned short srch,
94 unsigned short destw, unsigned short desth);
95 static void pp_nv12_dndi_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
96 unsigned short srcw, unsigned short srch,
97 unsigned short destw, unsigned short desth);
99 static struct pp_module pp_modules_gen5[] = {
101 "NULL module (for testing)",
104 sizeof(pp_null_gen5),
110 "NV12 Load & Save module",
112 pp_nv12_load_save_gen5,
113 sizeof(pp_nv12_load_save_gen5),
115 pp_nv12_load_save_initialize,
119 "NV12 Scaling module",
121 pp_nv12_scaling_gen5,
122 sizeof(pp_nv12_scaling_gen5),
124 pp_nv12_scaling_initialize,
131 sizeof(pp_nv12_avs_gen5),
133 pp_nv12_avs_initialize,
140 sizeof(pp_nv12_dndi_gen5),
142 pp_nv12_dndi_initialize,
146 static uint32_t pp_null_gen6[][4] = {
147 #include "shaders/post_processing/null.g6b"
150 static uint32_t pp_nv12_load_save_gen6[][4] = {
151 #include "shaders/post_processing/nv12_load_save_nv12.g6b"
154 static uint32_t pp_nv12_scaling_gen6[][4] = {
155 #include "shaders/post_processing/nv12_scaling_nv12.g6b"
158 static uint32_t pp_nv12_avs_gen6[][4] = {
159 #include "shaders/post_processing/nv12_avs_nv12.g6b"
162 static uint32_t pp_nv12_dndi_gen6[][4] = {
163 #include "shaders/post_processing/nv12_dndi_nv12.g6b"
166 static struct pp_module pp_modules_gen6[] = {
168 "NULL module (for testing)",
171 sizeof(pp_null_gen6),
177 "NV12 Load & Save module",
179 pp_nv12_load_save_gen6,
180 sizeof(pp_nv12_load_save_gen6),
182 pp_nv12_load_save_initialize,
186 "NV12 Scaling module",
188 pp_nv12_scaling_gen6,
189 sizeof(pp_nv12_scaling_gen6),
191 pp_nv12_scaling_initialize,
198 sizeof(pp_nv12_avs_gen6),
200 pp_nv12_avs_initialize,
207 sizeof(pp_nv12_dndi_gen6),
209 pp_nv12_dndi_initialize,
213 #define NUM_PP_MODULES ARRAY_ELEMS(pp_modules_gen5)
215 static struct pp_module *pp_modules = NULL;
217 struct pp_static_parameter
221 float procamp_constant_c0;
223 /* Load and Same r1.1 */
224 unsigned int source_packed_y_offset:8;
225 unsigned int source_packed_u_offset:8;
226 unsigned int source_packed_v_offset:8;
230 /* Load and Save r1.2 */
232 unsigned int destination_packed_y_offset:8;
233 unsigned int destination_packed_u_offset:8;
234 unsigned int destination_packed_v_offset:8;
240 unsigned int destination_rgb_format:8;
241 unsigned int pad0:24;
246 float procamp_constant_c1;
249 float procamp_constant_c2;
252 unsigned int statistics_surface_picth:16; /* Devided by 2 */
253 unsigned int pad1:16;
258 unsigned int pad0:24;
259 unsigned int top_field_first:8;
262 /* AVS/Scaling r1.6 */
263 float normalized_video_y_scaling_step;
267 float procamp_constant_c5;
272 float procamp_constant_c3;
278 float wg_csc_constant_c4;
281 float wg_csc_constant_c8;
284 float procamp_constant_c4;
293 float wg_csc_constant_c9;
298 float wg_csc_constant_c0;
301 float scaling_step_ratio;
304 float normalized_alpha_y_scaling;
307 float wg_csc_constant_c4;
310 float wg_csc_constant_c1;
313 int horizontal_origin_offset:16;
314 int vertical_origin_offset:16;
319 unsigned int color_pixel;
322 float wg_csc_constant_c2;
326 float wg_csc_constant_c3;
331 float wg_csc_constant_c6;
333 /* ALL r4.1 MBZ ???*/
340 unsigned int pad1:15;
342 unsigned int pad2:16;
347 unsigned int motion_history_coefficient_m2:8;
348 unsigned int motion_history_coefficient_m1:8;
349 unsigned int pad0:16;
354 float wg_csc_constant_c7;
357 float wg_csc_constant_c10;
360 float source_video_frame_normalized_horizontal_origin;
366 float wg_csc_constant_c11;
370 struct pp_inline_parameter
374 int destination_block_horizontal_origin:16;
375 int destination_block_vertical_origin:16;
380 float source_surface_block_normalized_horizontal_origin;
384 unsigned int variance_surface_vertical_origin:16;
385 unsigned int pad0:16;
389 /* AVS/Scaling r5.2 */
390 float source_surface_block_normalized_vertical_origin;
393 float alpha_surface_block_normalized_horizontal_origin;
396 float alpha_surface_block_normalized_vertical_origin;
399 unsigned int alpha_mask_x:16;
400 unsigned int alpha_mask_y:8;
401 unsigned int block_count_x:8;
404 unsigned int block_horizontal_mask:16;
405 unsigned int block_vertical_mask:8;
406 unsigned int number_blocks:8;
408 /* AVS/Scaling r5.7 */
409 float normalized_video_x_scaling_step;
414 float video_step_delta;
417 unsigned int padx[7];
421 static struct pp_static_parameter pp_static_parameter;
422 static struct pp_inline_parameter pp_inline_parameter;
425 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
428 case I915_TILING_NONE:
429 ss->ss3.tiled_surface = 0;
430 ss->ss3.tile_walk = 0;
433 ss->ss3.tiled_surface = 1;
434 ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
437 ss->ss3.tiled_surface = 1;
438 ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
444 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
447 case I915_TILING_NONE:
448 ss->ss2.tiled_surface = 0;
449 ss->ss2.tile_walk = 0;
452 ss->ss2.tiled_surface = 1;
453 ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
456 ss->ss2.tiled_surface = 1;
457 ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
463 ironlake_pp_surface_state(struct i965_post_processing_context *pp_context)
469 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
471 struct i965_interface_descriptor *desc;
473 int pp_index = pp_context->current_pp;
475 bo = pp_context->idrt.bo;
479 memset(desc, 0, sizeof(*desc));
480 desc->desc0.grf_reg_blocks = 10;
481 desc->desc0.kernel_start_pointer = pp_modules[pp_index].bo->offset >> 6; /* reloc */
482 desc->desc1.const_urb_entry_read_offset = 0;
483 desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
484 desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
485 desc->desc2.sampler_count = 0;
486 desc->desc3.binding_table_entry_count = 0;
487 desc->desc3.binding_table_pointer =
488 pp_context->binding_table.bo->offset >> 5; /*reloc */
490 dri_bo_emit_reloc(bo,
491 I915_GEM_DOMAIN_INSTRUCTION, 0,
492 desc->desc0.grf_reg_blocks,
493 offsetof(struct i965_interface_descriptor, desc0),
494 pp_modules[pp_index].bo);
496 dri_bo_emit_reloc(bo,
497 I915_GEM_DOMAIN_INSTRUCTION, 0,
498 desc->desc2.sampler_count << 2,
499 offsetof(struct i965_interface_descriptor, desc2),
500 pp_context->sampler_state_table.bo);
502 dri_bo_emit_reloc(bo,
503 I915_GEM_DOMAIN_INSTRUCTION, 0,
504 desc->desc3.binding_table_entry_count,
505 offsetof(struct i965_interface_descriptor, desc3),
506 pp_context->binding_table.bo);
509 pp_context->idrt.num_interface_descriptors++;
513 ironlake_pp_binding_table(struct i965_post_processing_context *pp_context)
515 unsigned int *binding_table;
516 dri_bo *bo = pp_context->binding_table.bo;
521 binding_table = bo->virtual;
522 memset(binding_table, 0, bo->size);
524 for (i = 0; i < MAX_PP_SURFACES; i++) {
525 if (pp_context->surfaces[i].ss_bo) {
526 assert(pp_context->surfaces[i].s_bo);
528 binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
529 dri_bo_emit_reloc(bo,
530 I915_GEM_DOMAIN_INSTRUCTION, 0,
532 i * sizeof(*binding_table),
533 pp_context->surfaces[i].ss_bo);
542 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
544 struct i965_vfe_state *vfe_state;
547 bo = pp_context->vfe_state.bo;
550 vfe_state = bo->virtual;
551 memset(vfe_state, 0, sizeof(*vfe_state));
552 vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
553 vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
554 vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
555 vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
556 vfe_state->vfe1.children_present = 0;
557 vfe_state->vfe2.interface_descriptor_base =
558 pp_context->idrt.bo->offset >> 4; /* reloc */
559 dri_bo_emit_reloc(bo,
560 I915_GEM_DOMAIN_INSTRUCTION, 0,
562 offsetof(struct i965_vfe_state, vfe2),
563 pp_context->idrt.bo);
568 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
570 unsigned char *constant_buffer;
572 assert(sizeof(pp_static_parameter) == 128);
573 dri_bo_map(pp_context->curbe.bo, 1);
574 assert(pp_context->curbe.bo->virtual);
575 constant_buffer = pp_context->curbe.bo->virtual;
576 memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
577 dri_bo_unmap(pp_context->curbe.bo);
581 ironlake_pp_states_setup(VADriverContextP ctx)
583 struct i965_driver_data *i965 = i965_driver_data(ctx);
584 struct i965_post_processing_context *pp_context = i965->pp_context;
586 ironlake_pp_surface_state(pp_context);
587 ironlake_pp_binding_table(pp_context);
588 ironlake_pp_interface_descriptor_table(pp_context);
589 ironlake_pp_vfe_state(pp_context);
590 ironlake_pp_upload_constants(pp_context);
594 ironlake_pp_pipeline_select(VADriverContextP ctx)
597 OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
602 ironlake_pp_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
604 unsigned int vfe_fence, cs_fence;
606 vfe_fence = pp_context->urb.cs_start;
607 cs_fence = pp_context->urb.size;
610 OUT_BATCH(ctx, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
613 (vfe_fence << UF2_VFE_FENCE_SHIFT) | /* VFE_SIZE */
614 (cs_fence << UF2_CS_FENCE_SHIFT)); /* CS_SIZE */
619 ironlake_pp_state_base_address(VADriverContextP ctx)
622 OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6);
623 OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
624 OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
625 OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
626 OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
627 OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
628 OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
629 OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
634 ironlake_pp_state_pointers(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
637 OUT_BATCH(ctx, CMD_MEDIA_STATE_POINTERS | 1);
639 OUT_RELOC(ctx, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
644 ironlake_pp_cs_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
647 OUT_BATCH(ctx, CMD_CS_URB_STATE | 0);
649 ((pp_context->urb.size_cs_entry - 1) << 4) | /* URB Entry Allocation Size */
650 (pp_context->urb.num_cs_entries << 0)); /* Number of URB Entries */
655 ironlake_pp_constant_buffer(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
658 OUT_BATCH(ctx, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
659 OUT_RELOC(ctx, pp_context->curbe.bo,
660 I915_GEM_DOMAIN_INSTRUCTION, 0,
661 pp_context->urb.size_cs_entry - 1);
666 ironlake_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
668 int x, x_steps, y, y_steps;
670 x_steps = pp_context->pp_x_steps(&pp_context->private_context);
671 y_steps = pp_context->pp_y_steps(&pp_context->private_context);
673 for (y = 0; y < y_steps; y++) {
674 for (x = 0; x < x_steps; x++) {
675 if (!pp_context->pp_set_block_parameter(&pp_context->private_context, x, y)) {
676 BEGIN_BATCH(ctx, 20);
677 OUT_BATCH(ctx, CMD_MEDIA_OBJECT | 18);
679 OUT_BATCH(ctx, 0); /* no indirect data */
682 /* inline data grf 5-6 */
683 assert(sizeof(pp_inline_parameter) == 64);
684 intel_batchbuffer_data(ctx, &pp_inline_parameter, sizeof(pp_inline_parameter));
693 ironlake_pp_pipeline_setup(VADriverContextP ctx)
695 struct i965_driver_data *i965 = i965_driver_data(ctx);
696 struct i965_post_processing_context *pp_context = i965->pp_context;
698 intel_batchbuffer_start_atomic(ctx, 0x1000);
699 intel_batchbuffer_emit_mi_flush(ctx);
700 ironlake_pp_pipeline_select(ctx);
701 ironlake_pp_state_base_address(ctx);
702 ironlake_pp_state_pointers(ctx, pp_context);
703 ironlake_pp_urb_layout(ctx, pp_context);
704 ironlake_pp_cs_urb_layout(ctx, pp_context);
705 ironlake_pp_constant_buffer(ctx, pp_context);
706 ironlake_pp_object_walker(ctx, pp_context);
707 intel_batchbuffer_end_atomic(ctx);
711 pp_null_x_steps(void *private_context)
717 pp_null_y_steps(void *private_context)
723 pp_null_set_block_parameter(void *private_context, int x, int y)
729 pp_null_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
730 unsigned short srcw, unsigned short srch,
731 unsigned short destw, unsigned short desth)
733 struct i965_driver_data *i965 = i965_driver_data(ctx);
734 struct i965_post_processing_context *pp_context = i965->pp_context;
735 struct object_surface *obj_surface;
738 obj_surface = SURFACE(surface);
739 dri_bo_unreference(obj_surface->pp_out_bo);
740 obj_surface->pp_out_bo = obj_surface->bo;
741 dri_bo_reference(obj_surface->pp_out_bo);
742 assert(obj_surface->pp_out_bo);
743 obj_surface->pp_out_width = obj_surface->width;
744 obj_surface->pp_out_height = obj_surface->height;
745 obj_surface->orig_pp_out_width = obj_surface->orig_width;
746 obj_surface->orig_pp_out_height = obj_surface->orig_height;
748 /* private function & data */
749 pp_context->pp_x_steps = pp_null_x_steps;
750 pp_context->pp_y_steps = pp_null_y_steps;
751 pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
755 pp_load_save_x_steps(void *private_context)
761 pp_load_save_y_steps(void *private_context)
763 struct pp_load_save_context *pp_load_save_context = private_context;
765 return pp_load_save_context->dest_h / 8;
769 pp_load_save_set_block_parameter(void *private_context, int x, int y)
771 pp_inline_parameter.grf5.block_vertical_mask = 0xff;
772 pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
773 pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
774 pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
780 pp_nv12_load_save_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
781 unsigned short srcw, unsigned short srch,
782 unsigned short destw, unsigned short desth)
784 struct i965_driver_data *i965 = i965_driver_data(ctx);
785 struct i965_post_processing_context *pp_context = i965->pp_context;
786 struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
787 struct object_surface *obj_surface;
788 struct i965_surface_state *ss;
792 unsigned int tiling, swizzle;
795 obj_surface = SURFACE(surface);
796 orig_w = obj_surface->orig_width;
797 orig_h = obj_surface->orig_height;
798 w = obj_surface->width;
799 h = obj_surface->height;
801 dri_bo_unreference(obj_surface->pp_out_bo);
802 obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
803 "intermediate surface",
806 assert(obj_surface->pp_out_bo);
807 obj_surface->pp_out_width = obj_surface->width;
808 obj_surface->pp_out_height = obj_surface->height;
809 obj_surface->orig_pp_out_width = obj_surface->orig_width;
810 obj_surface->orig_pp_out_height = obj_surface->orig_height;
812 /* source Y surface index 1 */
813 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
816 pp_context->surfaces[index].s_bo = obj_surface->bo;
817 dri_bo_reference(pp_context->surfaces[index].s_bo);
818 bo = dri_bo_alloc(i965->intel.bufmgr,
820 sizeof(struct i965_surface_state),
823 pp_context->surfaces[index].ss_bo = bo;
824 dri_bo_map(bo, True);
827 memset(ss, 0, sizeof(*ss));
828 ss->ss0.surface_type = I965_SURFACE_2D;
829 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
830 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
831 ss->ss2.width = orig_w / 4 - 1;
832 ss->ss2.height = orig_h - 1;
833 ss->ss3.pitch = w - 1;
834 pp_set_surface_tiling(ss, tiling);
835 dri_bo_emit_reloc(bo,
836 I915_GEM_DOMAIN_RENDER,
839 offsetof(struct i965_surface_state, ss1),
840 pp_context->surfaces[index].s_bo);
843 /* source UV surface index 2 */
845 pp_context->surfaces[index].s_bo = obj_surface->bo;
846 dri_bo_reference(pp_context->surfaces[index].s_bo);
847 bo = dri_bo_alloc(i965->intel.bufmgr,
849 sizeof(struct i965_surface_state),
852 pp_context->surfaces[index].ss_bo = bo;
853 dri_bo_map(bo, True);
856 memset(ss, 0, sizeof(*ss));
857 ss->ss0.surface_type = I965_SURFACE_2D;
858 ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
859 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
860 ss->ss2.width = orig_w / 4 - 1;
861 ss->ss2.height = orig_h / 2 - 1;
862 ss->ss3.pitch = w - 1;
863 pp_set_surface_tiling(ss, tiling);
864 dri_bo_emit_reloc(bo,
865 I915_GEM_DOMAIN_RENDER,
868 offsetof(struct i965_surface_state, ss1),
869 pp_context->surfaces[index].s_bo);
872 /* destination Y surface index 7 */
874 pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
875 dri_bo_reference(pp_context->surfaces[index].s_bo);
876 bo = dri_bo_alloc(i965->intel.bufmgr,
878 sizeof(struct i965_surface_state),
881 pp_context->surfaces[index].ss_bo = bo;
882 dri_bo_map(bo, True);
885 memset(ss, 0, sizeof(*ss));
886 ss->ss0.surface_type = I965_SURFACE_2D;
887 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
888 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
889 ss->ss2.width = orig_w / 4 - 1;
890 ss->ss2.height = orig_h - 1;
891 ss->ss3.pitch = w - 1;
892 dri_bo_emit_reloc(bo,
893 I915_GEM_DOMAIN_RENDER,
894 I915_GEM_DOMAIN_RENDER,
896 offsetof(struct i965_surface_state, ss1),
897 pp_context->surfaces[index].s_bo);
900 /* destination UV surface index 8 */
902 pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
903 dri_bo_reference(pp_context->surfaces[index].s_bo);
904 bo = dri_bo_alloc(i965->intel.bufmgr,
906 sizeof(struct i965_surface_state),
909 pp_context->surfaces[index].ss_bo = bo;
910 dri_bo_map(bo, True);
913 memset(ss, 0, sizeof(*ss));
914 ss->ss0.surface_type = I965_SURFACE_2D;
915 ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
916 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
917 ss->ss2.width = orig_w / 4 - 1;
918 ss->ss2.height = orig_h / 2 - 1;
919 ss->ss3.pitch = w - 1;
920 dri_bo_emit_reloc(bo,
921 I915_GEM_DOMAIN_RENDER,
922 I915_GEM_DOMAIN_RENDER,
924 offsetof(struct i965_surface_state, ss1),
925 pp_context->surfaces[index].s_bo);
928 /* private function & data */
929 pp_context->pp_x_steps = pp_load_save_x_steps;
930 pp_context->pp_y_steps = pp_load_save_y_steps;
931 pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
932 pp_load_save_context->dest_h = h;
933 pp_load_save_context->dest_w = w;
935 pp_inline_parameter.grf5.block_count_x = w / 16; /* 1 x N */
936 pp_inline_parameter.grf5.number_blocks = w / 16;
940 pp_scaling_x_steps(void *private_context)
946 pp_scaling_y_steps(void *private_context)
948 struct pp_scaling_context *pp_scaling_context = private_context;
950 return pp_scaling_context->dest_h / 8;
954 pp_scaling_set_block_parameter(void *private_context, int x, int y)
956 float src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
957 float src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
959 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16;
960 pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8;
961 pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
962 pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
968 pp_nv12_scaling_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
969 unsigned short srcw, unsigned short srch,
970 unsigned short destw, unsigned short desth)
972 struct i965_driver_data *i965 = i965_driver_data(ctx);
973 struct i965_post_processing_context *pp_context = i965->pp_context;
974 struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
975 struct object_surface *obj_surface;
976 struct i965_sampler_state *sampler_state;
977 struct i965_surface_state *ss;
982 int pp_out_w, pp_out_h;
983 int orig_pp_out_w, orig_pp_out_h;
984 unsigned int tiling, swizzle;
987 obj_surface = SURFACE(surface);
988 orig_w = obj_surface->orig_width;
989 orig_h = obj_surface->orig_height;
990 w = obj_surface->width;
991 h = obj_surface->height;
993 orig_pp_out_w = destw;
994 orig_pp_out_h = desth;
995 pp_out_w = ALIGN(orig_pp_out_w, 16);
996 pp_out_h = ALIGN(orig_pp_out_h, 16);
997 dri_bo_unreference(obj_surface->pp_out_bo);
998 obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
999 "intermediate surface",
1000 SIZE_YUV420(pp_out_w, pp_out_h),
1002 assert(obj_surface->pp_out_bo);
1003 obj_surface->orig_pp_out_width = orig_pp_out_w;
1004 obj_surface->orig_pp_out_height = orig_pp_out_h;
1005 obj_surface->pp_out_width = pp_out_w;
1006 obj_surface->pp_out_height = pp_out_h;
1008 /* source Y surface index 1 */
1009 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
1012 pp_context->surfaces[index].s_bo = obj_surface->bo;
1013 dri_bo_reference(pp_context->surfaces[index].s_bo);
1014 bo = dri_bo_alloc(i965->intel.bufmgr,
1016 sizeof(struct i965_surface_state),
1019 pp_context->surfaces[index].ss_bo = bo;
1020 dri_bo_map(bo, True);
1021 assert(bo->virtual);
1023 memset(ss, 0, sizeof(*ss));
1024 ss->ss0.surface_type = I965_SURFACE_2D;
1025 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1026 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1027 ss->ss2.width = orig_w - 1;
1028 ss->ss2.height = orig_h - 1;
1029 ss->ss3.pitch = w - 1;
1030 pp_set_surface_tiling(ss, tiling);
1031 dri_bo_emit_reloc(bo,
1032 I915_GEM_DOMAIN_RENDER,
1035 offsetof(struct i965_surface_state, ss1),
1036 pp_context->surfaces[index].s_bo);
1039 /* source UV surface index 2 */
1041 pp_context->surfaces[index].s_bo = obj_surface->bo;
1042 dri_bo_reference(pp_context->surfaces[index].s_bo);
1043 bo = dri_bo_alloc(i965->intel.bufmgr,
1045 sizeof(struct i965_surface_state),
1048 pp_context->surfaces[index].ss_bo = bo;
1049 dri_bo_map(bo, True);
1050 assert(bo->virtual);
1052 memset(ss, 0, sizeof(*ss));
1053 ss->ss0.surface_type = I965_SURFACE_2D;
1054 ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1055 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1056 ss->ss2.width = orig_w / 2 - 1;
1057 ss->ss2.height = orig_h / 2 - 1;
1058 ss->ss3.pitch = w - 1;
1059 pp_set_surface_tiling(ss, tiling);
1060 dri_bo_emit_reloc(bo,
1061 I915_GEM_DOMAIN_RENDER,
1064 offsetof(struct i965_surface_state, ss1),
1065 pp_context->surfaces[index].s_bo);
1068 /* destination Y surface index 7 */
1070 pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1071 dri_bo_reference(pp_context->surfaces[index].s_bo);
1072 bo = dri_bo_alloc(i965->intel.bufmgr,
1074 sizeof(struct i965_surface_state),
1077 pp_context->surfaces[index].ss_bo = bo;
1078 dri_bo_map(bo, True);
1079 assert(bo->virtual);
1081 memset(ss, 0, sizeof(*ss));
1082 ss->ss0.surface_type = I965_SURFACE_2D;
1083 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1084 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1085 ss->ss2.width = pp_out_w / 4 - 1;
1086 ss->ss2.height = pp_out_h - 1;
1087 ss->ss3.pitch = pp_out_w - 1;
1088 dri_bo_emit_reloc(bo,
1089 I915_GEM_DOMAIN_RENDER,
1090 I915_GEM_DOMAIN_RENDER,
1092 offsetof(struct i965_surface_state, ss1),
1093 pp_context->surfaces[index].s_bo);
1096 /* destination UV surface index 8 */
1098 pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1099 dri_bo_reference(pp_context->surfaces[index].s_bo);
1100 bo = dri_bo_alloc(i965->intel.bufmgr,
1102 sizeof(struct i965_surface_state),
1105 pp_context->surfaces[index].ss_bo = bo;
1106 dri_bo_map(bo, True);
1107 assert(bo->virtual);
1109 memset(ss, 0, sizeof(*ss));
1110 ss->ss0.surface_type = I965_SURFACE_2D;
1111 ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1112 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + pp_out_w * pp_out_h;
1113 ss->ss2.width = pp_out_w / 4 - 1;
1114 ss->ss2.height = pp_out_h / 2 - 1;
1115 ss->ss3.pitch = pp_out_w - 1;
1116 dri_bo_emit_reloc(bo,
1117 I915_GEM_DOMAIN_RENDER,
1118 I915_GEM_DOMAIN_RENDER,
1119 pp_out_w * pp_out_h,
1120 offsetof(struct i965_surface_state, ss1),
1121 pp_context->surfaces[index].s_bo);
1125 dri_bo_map(pp_context->sampler_state_table.bo, True);
1126 assert(pp_context->sampler_state_table.bo->virtual);
1127 sampler_state = pp_context->sampler_state_table.bo->virtual;
1129 /* SIMD16 Y index 1 */
1130 sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
1131 sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1132 sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1133 sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1134 sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1136 /* SIMD16 UV index 2 */
1137 sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
1138 sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1139 sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1140 sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1141 sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1143 dri_bo_unmap(pp_context->sampler_state_table.bo);
1145 /* private function & data */
1146 pp_context->pp_x_steps = pp_scaling_x_steps;
1147 pp_context->pp_y_steps = pp_scaling_y_steps;
1148 pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
1150 pp_scaling_context->dest_w = pp_out_w;
1151 pp_scaling_context->dest_h = pp_out_h;
1153 pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) 1.0 / pp_out_h;
1154 pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) 1.0 / pp_out_w;
1155 pp_inline_parameter.grf5.block_count_x = pp_out_w / 16; /* 1 x N */
1156 pp_inline_parameter.grf5.number_blocks = pp_out_w / 16;
1157 pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1158 pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1162 pp_avs_x_steps(void *private_context)
1164 struct pp_avs_context *pp_avs_context = private_context;
1166 return pp_avs_context->dest_w / 16;
1170 pp_avs_y_steps(void *private_context)
1176 pp_avs_set_block_parameter(void *private_context, int x, int y)
1178 struct pp_avs_context *pp_avs_context = private_context;
1179 float src_x_steping, src_y_steping, video_step_delta;
1180 int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
1182 if (tmp_w >= pp_avs_context->dest_w) {
1183 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1184 pp_inline_parameter.grf6.video_step_delta = 0;
1187 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2;
1189 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1190 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1191 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1192 16 * 15 * video_step_delta / 2;
1195 int n0, n1, n2, nls_left, nls_right;
1196 int factor_a = 5, factor_b = 4;
1199 n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1200 n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1201 n2 = tmp_w / (16 * factor_a);
1203 nls_right = n1 + n2;
1204 f = (float) n2 * 16 / tmp_w;
1207 pp_inline_parameter.grf6.video_step_delta = 0.0;
1210 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1211 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = 0.0;
1213 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1214 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1215 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1216 16 * 15 * video_step_delta / 2;
1220 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1221 float a = f / (nls_left * 16 * factor_b);
1222 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1224 pp_inline_parameter.grf6.video_step_delta = b;
1227 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = 0.0;
1228 pp_inline_parameter.grf5.normalized_video_x_scaling_step = a;
1230 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1231 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1232 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1233 16 * 15 * video_step_delta / 2;
1234 pp_inline_parameter.grf5.normalized_video_x_scaling_step += 16 * b;
1236 } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1237 /* scale the center linearly */
1238 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1239 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1240 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1241 16 * 15 * video_step_delta / 2;
1242 pp_inline_parameter.grf6.video_step_delta = 0.0;
1243 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1245 float a = f / (nls_right * 16 * factor_b);
1246 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1248 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1249 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1250 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1251 16 * 15 * video_step_delta / 2;
1252 pp_inline_parameter.grf6.video_step_delta = -b;
1254 if (x == (pp_avs_context->dest_w / 16 - nls_right))
1255 pp_inline_parameter.grf5.normalized_video_x_scaling_step = a + (nls_right * 16 - 1) * b;
1257 pp_inline_parameter.grf5.normalized_video_x_scaling_step -= b * 16;
1262 src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
1263 pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8;
1264 pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1265 pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
1271 pp_nv12_avs_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
1272 unsigned short srcw, unsigned short srch,
1273 unsigned short destw, unsigned short desth)
1275 struct i965_driver_data *i965 = i965_driver_data(ctx);
1276 struct i965_post_processing_context *pp_context = i965->pp_context;
1277 struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1278 struct object_surface *obj_surface;
1279 struct i965_surface_state *ss;
1280 struct i965_sampler_8x8 *sampler_8x8;
1281 struct i965_sampler_8x8_state *sampler_8x8_state;
1282 struct i965_surface_state2 *ss_8x8;
1283 dri_bo *bo, *src_bo;
1287 int pp_out_w, pp_out_h;
1288 int orig_pp_out_w, orig_pp_out_h;
1289 unsigned int tiling, swizzle;
1292 obj_surface = SURFACE(surface);
1295 orig_w = obj_surface->orig_pp_out_width;
1296 orig_h = obj_surface->orig_pp_out_height;
1297 w = obj_surface->pp_out_width;
1298 h = obj_surface->pp_out_height;
1299 src_bo = obj_surface->pp_out_bo;
1301 orig_w = obj_surface->orig_width;
1302 orig_h = obj_surface->orig_height;
1303 w = obj_surface->width;
1304 h = obj_surface->height;
1305 src_bo = obj_surface->bo;
1309 dri_bo_get_tiling(src_bo, &tiling, &swizzle);
1311 /* source Y surface index 1 */
1313 pp_context->surfaces[index].s_bo = src_bo;
1314 dri_bo_reference(pp_context->surfaces[index].s_bo);
1315 bo = dri_bo_alloc(i965->intel.bufmgr,
1316 "Y surface state for sample_8x8",
1317 sizeof(struct i965_surface_state2),
1320 pp_context->surfaces[index].ss_bo = bo;
1321 dri_bo_map(bo, True);
1322 assert(bo->virtual);
1323 ss_8x8 = bo->virtual;
1324 memset(ss_8x8, 0, sizeof(*ss_8x8));
1325 ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
1326 ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
1327 ss_8x8->ss1.width = orig_w - 1;
1328 ss_8x8->ss1.height = orig_h - 1;
1329 ss_8x8->ss2.half_pitch_for_chroma = 0;
1330 ss_8x8->ss2.pitch = w - 1;
1331 ss_8x8->ss2.interleave_chroma = 0;
1332 ss_8x8->ss2.surface_format = SURFACE_FORMAT_Y8_UNORM;
1333 ss_8x8->ss3.x_offset_for_cb = 0;
1334 ss_8x8->ss3.y_offset_for_cb = 0;
1335 pp_set_surface2_tiling(ss_8x8, tiling);
1336 dri_bo_emit_reloc(bo,
1337 I915_GEM_DOMAIN_RENDER,
1340 offsetof(struct i965_surface_state2, ss0),
1341 pp_context->surfaces[index].s_bo);
1344 /* source UV surface index 2 */
1346 pp_context->surfaces[index].s_bo = src_bo;
1347 dri_bo_reference(pp_context->surfaces[index].s_bo);
1348 bo = dri_bo_alloc(i965->intel.bufmgr,
1349 "UV surface state for sample_8x8",
1350 sizeof(struct i965_surface_state2),
1353 pp_context->surfaces[index].ss_bo = bo;
1354 dri_bo_map(bo, True);
1355 assert(bo->virtual);
1356 ss_8x8 = bo->virtual;
1357 memset(ss_8x8, 0, sizeof(*ss_8x8));
1358 ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset + w * h;
1359 ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
1360 ss_8x8->ss1.width = orig_w - 1;
1361 ss_8x8->ss1.height = orig_h - 1;
1362 ss_8x8->ss2.half_pitch_for_chroma = 0;
1363 ss_8x8->ss2.pitch = w - 1;
1364 ss_8x8->ss2.interleave_chroma = 1;
1365 ss_8x8->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
1366 ss_8x8->ss3.x_offset_for_cb = 0;
1367 ss_8x8->ss3.y_offset_for_cb = 0;
1368 pp_set_surface2_tiling(ss_8x8, tiling);
1369 dri_bo_emit_reloc(bo,
1370 I915_GEM_DOMAIN_RENDER,
1373 offsetof(struct i965_surface_state2, ss0),
1374 pp_context->surfaces[index].s_bo);
1377 orig_pp_out_w = destw;
1378 orig_pp_out_h = desth;
1379 pp_out_w = ALIGN(orig_pp_out_w, 16);
1380 pp_out_h = ALIGN(orig_pp_out_h, 16);
1381 dri_bo_unreference(obj_surface->pp_out_bo);
1382 obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
1383 "intermediate surface",
1384 SIZE_YUV420(pp_out_w, pp_out_h),
1386 assert(obj_surface->pp_out_bo);
1387 obj_surface->orig_pp_out_width = orig_pp_out_w;
1388 obj_surface->orig_pp_out_height = orig_pp_out_h;
1389 obj_surface->pp_out_width = pp_out_w;
1390 obj_surface->pp_out_height = pp_out_h;
1392 /* destination Y surface index 7 */
1394 pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1395 dri_bo_reference(pp_context->surfaces[index].s_bo);
1396 bo = dri_bo_alloc(i965->intel.bufmgr,
1398 sizeof(struct i965_surface_state),
1401 pp_context->surfaces[index].ss_bo = bo;
1402 dri_bo_map(bo, True);
1403 assert(bo->virtual);
1405 memset(ss, 0, sizeof(*ss));
1406 ss->ss0.surface_type = I965_SURFACE_2D;
1407 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1408 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1409 ss->ss2.width = pp_out_w / 4 - 1;
1410 ss->ss2.height = pp_out_h - 1;
1411 ss->ss3.pitch = pp_out_w - 1;
1412 dri_bo_emit_reloc(bo,
1413 I915_GEM_DOMAIN_RENDER,
1414 I915_GEM_DOMAIN_RENDER,
1416 offsetof(struct i965_surface_state, ss1),
1417 pp_context->surfaces[index].s_bo);
1420 /* destination UV surface index 8 */
1422 pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1423 dri_bo_reference(pp_context->surfaces[index].s_bo);
1424 bo = dri_bo_alloc(i965->intel.bufmgr,
1426 sizeof(struct i965_surface_state),
1429 pp_context->surfaces[index].ss_bo = bo;
1430 dri_bo_map(bo, True);
1431 assert(bo->virtual);
1433 memset(ss, 0, sizeof(*ss));
1434 ss->ss0.surface_type = I965_SURFACE_2D;
1435 ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1436 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + pp_out_w * pp_out_h;
1437 ss->ss2.width = pp_out_w / 4 - 1;
1438 ss->ss2.height = pp_out_h / 2 - 1;
1439 ss->ss3.pitch = pp_out_w - 1;
1440 dri_bo_emit_reloc(bo,
1441 I915_GEM_DOMAIN_RENDER,
1442 I915_GEM_DOMAIN_RENDER,
1443 pp_out_w * pp_out_h,
1444 offsetof(struct i965_surface_state, ss1),
1445 pp_context->surfaces[index].s_bo);
1448 /* sampler 8x8 state */
1449 dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1450 assert(pp_context->sampler_state_table.bo_8x8->virtual);
1451 assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1452 sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1453 memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1454 sampler_8x8_state->dw136.default_sharpness_level = 0;
1455 sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1456 sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1457 sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1458 dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1461 dri_bo_map(pp_context->sampler_state_table.bo, True);
1462 assert(pp_context->sampler_state_table.bo->virtual);
1463 assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
1464 sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1466 /* sample_8x8 Y index 1 */
1468 memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1469 sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1470 sampler_8x8[index].dw0.ief_bypass = 0;
1471 sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1472 sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1473 sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1474 sampler_8x8[index].dw2.global_noise_estimation = 22;
1475 sampler_8x8[index].dw2.strong_edge_threshold = 8;
1476 sampler_8x8[index].dw2.weak_edge_threshold = 1;
1477 sampler_8x8[index].dw3.strong_edge_weight = 7;
1478 sampler_8x8[index].dw3.regular_weight = 2;
1479 sampler_8x8[index].dw3.non_edge_weight = 0;
1480 sampler_8x8[index].dw3.gain_factor = 40;
1481 sampler_8x8[index].dw4.steepness_boost = 0;
1482 sampler_8x8[index].dw4.steepness_threshold = 0;
1483 sampler_8x8[index].dw4.mr_boost = 0;
1484 sampler_8x8[index].dw4.mr_threshold = 5;
1485 sampler_8x8[index].dw5.pwl1_point_1 = 4;
1486 sampler_8x8[index].dw5.pwl1_point_2 = 12;
1487 sampler_8x8[index].dw5.pwl1_point_3 = 16;
1488 sampler_8x8[index].dw5.pwl1_point_4 = 26;
1489 sampler_8x8[index].dw6.pwl1_point_5 = 40;
1490 sampler_8x8[index].dw6.pwl1_point_6 = 160;
1491 sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1492 sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1493 sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1494 sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1495 sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1496 sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1497 sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1498 sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1499 sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1500 sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1501 sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1502 sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1503 sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1504 sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1505 sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1506 sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1507 sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1508 sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1509 sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1510 sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1511 sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1512 sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1513 sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1514 sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1515 sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1516 sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1517 sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1518 sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1519 sampler_8x8[index].dw13.limiter_boost = 0;
1520 sampler_8x8[index].dw13.minimum_limiter = 10;
1521 sampler_8x8[index].dw13.maximum_limiter = 11;
1522 sampler_8x8[index].dw14.clip_limiter = 130;
1523 dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1524 I915_GEM_DOMAIN_RENDER,
1527 sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1528 pp_context->sampler_state_table.bo_8x8);
1530 dri_bo_map(pp_context->sampler_state_table.bo_8x8_uv, True);
1531 assert(pp_context->sampler_state_table.bo_8x8_uv->virtual);
1532 assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1533 sampler_8x8_state = pp_context->sampler_state_table.bo_8x8_uv->virtual;
1534 memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1535 sampler_8x8_state->dw136.default_sharpness_level = 0;
1536 sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0;
1537 sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1538 sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1539 dri_bo_unmap(pp_context->sampler_state_table.bo_8x8_uv);
1541 /* sample_8x8 UV index 2 */
1543 memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1544 sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_NEAREST;
1545 sampler_8x8[index].dw0.ief_bypass = 0;
1546 sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1547 sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1548 sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8_uv->offset >> 5;
1549 sampler_8x8[index].dw2.global_noise_estimation = 22;
1550 sampler_8x8[index].dw2.strong_edge_threshold = 8;
1551 sampler_8x8[index].dw2.weak_edge_threshold = 1;
1552 sampler_8x8[index].dw3.strong_edge_weight = 7;
1553 sampler_8x8[index].dw3.regular_weight = 2;
1554 sampler_8x8[index].dw3.non_edge_weight = 0;
1555 sampler_8x8[index].dw3.gain_factor = 40;
1556 sampler_8x8[index].dw4.steepness_boost = 0;
1557 sampler_8x8[index].dw4.steepness_threshold = 0;
1558 sampler_8x8[index].dw4.mr_boost = 0;
1559 sampler_8x8[index].dw4.mr_threshold = 5;
1560 sampler_8x8[index].dw5.pwl1_point_1 = 4;
1561 sampler_8x8[index].dw5.pwl1_point_2 = 12;
1562 sampler_8x8[index].dw5.pwl1_point_3 = 16;
1563 sampler_8x8[index].dw5.pwl1_point_4 = 26;
1564 sampler_8x8[index].dw6.pwl1_point_5 = 40;
1565 sampler_8x8[index].dw6.pwl1_point_6 = 160;
1566 sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1567 sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1568 sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1569 sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1570 sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1571 sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1572 sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1573 sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1574 sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1575 sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1576 sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1577 sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1578 sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1579 sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1580 sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1581 sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1582 sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1583 sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1584 sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1585 sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1586 sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1587 sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1588 sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1589 sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1590 sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1591 sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1592 sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1593 sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1594 sampler_8x8[index].dw13.limiter_boost = 0;
1595 sampler_8x8[index].dw13.minimum_limiter = 10;
1596 sampler_8x8[index].dw13.maximum_limiter = 11;
1597 sampler_8x8[index].dw14.clip_limiter = 130;
1598 dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1599 I915_GEM_DOMAIN_RENDER,
1602 sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1603 pp_context->sampler_state_table.bo_8x8_uv);
1605 dri_bo_unmap(pp_context->sampler_state_table.bo);
1607 /* private function & data */
1608 pp_context->pp_x_steps = pp_avs_x_steps;
1609 pp_context->pp_y_steps = pp_avs_y_steps;
1610 pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
1612 pp_avs_context->dest_w = pp_out_w;
1613 pp_avs_context->dest_h = pp_out_h;
1614 pp_avs_context->src_w = w;
1615 pp_avs_context->src_h = h;
1617 pp_static_parameter.grf4.r4_2.avs.nlas = 1;
1618 pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) 1.0 / pp_out_h;
1619 pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) 1.0 / pp_out_w;
1620 pp_inline_parameter.grf5.block_count_x = 1; /* M x 1 */
1621 pp_inline_parameter.grf5.number_blocks = pp_out_h / 8;
1622 pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1623 pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1624 pp_inline_parameter.grf6.video_step_delta = 0.0;
1628 pp_dndi_x_steps(void *private_context)
1634 pp_dndi_y_steps(void *private_context)
1636 struct pp_dndi_context *pp_dndi_context = private_context;
1638 return pp_dndi_context->dest_h / 4;
1642 pp_dndi_set_block_parameter(void *private_context, int x, int y)
1644 pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1645 pp_inline_parameter.grf5.destination_block_vertical_origin = y * 4;
1651 void pp_nv12_dndi_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
1652 unsigned short srcw, unsigned short srch,
1653 unsigned short destw, unsigned short desth)
1655 struct i965_driver_data *i965 = i965_driver_data(ctx);
1656 struct i965_post_processing_context *pp_context = i965->pp_context;
1657 struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
1658 struct object_surface *obj_surface;
1659 struct i965_surface_state *ss;
1660 struct i965_surface_state2 *ss_dndi;
1661 struct i965_sampler_dndi *sampler_dndi;
1666 unsigned int tiling, swizzle;
1669 obj_surface = SURFACE(surface);
1670 orig_w = obj_surface->orig_width;
1671 orig_h = obj_surface->orig_height;
1672 w = obj_surface->width;
1673 h = obj_surface->height;
1675 if (pp_context->stmm.bo == NULL) {
1676 pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
1680 assert(pp_context->stmm.bo);
1683 dri_bo_unreference(obj_surface->pp_out_bo);
1684 obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
1685 "intermediate surface",
1688 assert(obj_surface->pp_out_bo);
1689 obj_surface->orig_pp_out_width = orig_w;
1690 obj_surface->orig_pp_out_height = orig_h;
1691 obj_surface->pp_out_width = w;
1692 obj_surface->pp_out_height = h;
1694 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
1695 /* source UV surface index 2 */
1697 pp_context->surfaces[index].s_bo = obj_surface->bo;
1698 dri_bo_reference(pp_context->surfaces[index].s_bo);
1699 bo = dri_bo_alloc(i965->intel.bufmgr,
1701 sizeof(struct i965_surface_state),
1704 pp_context->surfaces[index].ss_bo = bo;
1705 dri_bo_map(bo, True);
1706 assert(bo->virtual);
1708 memset(ss, 0, sizeof(*ss));
1709 ss->ss0.surface_type = I965_SURFACE_2D;
1710 ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1711 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1712 ss->ss2.width = orig_w / 4 - 1;
1713 ss->ss2.height = orig_h / 2 - 1;
1714 ss->ss3.pitch = w - 1;
1715 pp_set_surface_tiling(ss, tiling);
1716 dri_bo_emit_reloc(bo,
1717 I915_GEM_DOMAIN_RENDER,
1720 offsetof(struct i965_surface_state, ss1),
1721 pp_context->surfaces[index].s_bo);
1724 /* source YUV surface index 4 */
1726 pp_context->surfaces[index].s_bo = obj_surface->bo;
1727 dri_bo_reference(pp_context->surfaces[index].s_bo);
1728 bo = dri_bo_alloc(i965->intel.bufmgr,
1729 "YUV surface state for deinterlace ",
1730 sizeof(struct i965_surface_state2),
1733 pp_context->surfaces[index].ss_bo = bo;
1734 dri_bo_map(bo, True);
1735 assert(bo->virtual);
1736 ss_dndi = bo->virtual;
1737 memset(ss_dndi, 0, sizeof(*ss_dndi));
1738 ss_dndi->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
1739 ss_dndi->ss1.cbcr_pixel_offset_v_direction = 0;
1740 ss_dndi->ss1.width = w - 1;
1741 ss_dndi->ss1.height = h - 1;
1742 ss_dndi->ss1.cbcr_pixel_offset_v_direction = 1;
1743 ss_dndi->ss2.half_pitch_for_chroma = 0;
1744 ss_dndi->ss2.pitch = w - 1;
1745 ss_dndi->ss2.interleave_chroma = 1;
1746 ss_dndi->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
1747 ss_dndi->ss2.half_pitch_for_chroma = 0;
1748 ss_dndi->ss2.tiled_surface = 0;
1749 ss_dndi->ss3.x_offset_for_cb = 0;
1750 ss_dndi->ss3.y_offset_for_cb = h;
1751 pp_set_surface2_tiling(ss_dndi, tiling);
1752 dri_bo_emit_reloc(bo,
1753 I915_GEM_DOMAIN_RENDER,
1756 offsetof(struct i965_surface_state2, ss0),
1757 pp_context->surfaces[index].s_bo);
1760 /* source STMM surface index 20 */
1762 pp_context->surfaces[index].s_bo = pp_context->stmm.bo;
1763 dri_bo_reference(pp_context->surfaces[index].s_bo);
1764 bo = dri_bo_alloc(i965->intel.bufmgr,
1765 "STMM surface state for deinterlace ",
1766 sizeof(struct i965_surface_state2),
1769 pp_context->surfaces[index].ss_bo = bo;
1770 dri_bo_map(bo, True);
1771 assert(bo->virtual);
1773 memset(ss, 0, sizeof(*ss));
1774 ss->ss0.surface_type = I965_SURFACE_2D;
1775 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1776 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1777 ss->ss2.width = w - 1;
1778 ss->ss2.height = h - 1;
1779 ss->ss3.pitch = w - 1;
1780 dri_bo_emit_reloc(bo,
1781 I915_GEM_DOMAIN_RENDER,
1782 I915_GEM_DOMAIN_RENDER,
1784 offsetof(struct i965_surface_state, ss1),
1785 pp_context->surfaces[index].s_bo);
1788 /* destination Y surface index 7 */
1790 pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1791 dri_bo_reference(pp_context->surfaces[index].s_bo);
1792 bo = dri_bo_alloc(i965->intel.bufmgr,
1794 sizeof(struct i965_surface_state),
1797 pp_context->surfaces[index].ss_bo = bo;
1798 dri_bo_map(bo, True);
1799 assert(bo->virtual);
1801 memset(ss, 0, sizeof(*ss));
1802 ss->ss0.surface_type = I965_SURFACE_2D;
1803 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1804 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1805 ss->ss2.width = w / 4 - 1;
1806 ss->ss2.height = h - 1;
1807 ss->ss3.pitch = w - 1;
1808 dri_bo_emit_reloc(bo,
1809 I915_GEM_DOMAIN_RENDER,
1810 I915_GEM_DOMAIN_RENDER,
1812 offsetof(struct i965_surface_state, ss1),
1813 pp_context->surfaces[index].s_bo);
1816 /* destination UV surface index 8 */
1818 pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1819 dri_bo_reference(pp_context->surfaces[index].s_bo);
1820 bo = dri_bo_alloc(i965->intel.bufmgr,
1822 sizeof(struct i965_surface_state),
1825 pp_context->surfaces[index].ss_bo = bo;
1826 dri_bo_map(bo, True);
1827 assert(bo->virtual);
1829 memset(ss, 0, sizeof(*ss));
1830 ss->ss0.surface_type = I965_SURFACE_2D;
1831 ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1832 ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1833 ss->ss2.width = w / 4 - 1;
1834 ss->ss2.height = h / 2 - 1;
1835 ss->ss3.pitch = w - 1;
1836 dri_bo_emit_reloc(bo,
1837 I915_GEM_DOMAIN_RENDER,
1838 I915_GEM_DOMAIN_RENDER,
1840 offsetof(struct i965_surface_state, ss1),
1841 pp_context->surfaces[index].s_bo);
1845 dri_bo_map(pp_context->sampler_state_table.bo, True);
1846 assert(pp_context->sampler_state_table.bo->virtual);
1847 assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
1848 sampler_dndi = pp_context->sampler_state_table.bo->virtual;
1850 /* sample dndi index 1 */
1852 sampler_dndi[index].dw0.denoise_asd_threshold = 0;
1853 sampler_dndi[index].dw0.denoise_history_delta = 8; // 0-15, default is 8
1854 sampler_dndi[index].dw0.denoise_maximum_history = 128; // 128-240
1855 sampler_dndi[index].dw0.denoise_stad_threshold = 0;
1857 sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
1858 sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
1859 sampler_dndi[index].dw1.stmm_c2 = 0;
1860 sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
1861 sampler_dndi[index].dw1.temporal_difference_threshold = 16;
1863 sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15; // 0-31
1864 sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7; // 0-15
1865 sampler_dndi[index].dw2.denoise_edge_threshold = 7; // 0-15
1866 sampler_dndi[index].dw2.good_neighbor_threshold = 7; // 0-63
1868 sampler_dndi[index].dw3.maximum_stmm = 128;
1869 sampler_dndi[index].dw3.multipler_for_vecm = 2;
1870 sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
1871 sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
1872 sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
1874 sampler_dndi[index].dw4.sdi_delta = 8;
1875 sampler_dndi[index].dw4.sdi_threshold = 128;
1876 sampler_dndi[index].dw4.stmm_output_shift = 7; // stmm_max - stmm_min = 2 ^ stmm_output_shift
1877 sampler_dndi[index].dw4.stmm_shift_up = 0;
1878 sampler_dndi[index].dw4.stmm_shift_down = 0;
1879 sampler_dndi[index].dw4.minimum_stmm = 0;
1881 sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
1882 sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
1883 sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
1884 sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
1886 sampler_dndi[index].dw6.dn_enable = 1;
1887 sampler_dndi[index].dw6.di_enable = 1;
1888 sampler_dndi[index].dw6.di_partial = 0;
1889 sampler_dndi[index].dw6.dndi_top_first = 1;
1890 sampler_dndi[index].dw6.dndi_stream_id = 1;
1891 sampler_dndi[index].dw6.dndi_first_frame = 1;
1892 sampler_dndi[index].dw6.progressive_dn = 0;
1893 sampler_dndi[index].dw6.fmd_tear_threshold = 32;
1894 sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
1895 sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
1897 sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
1898 sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
1899 sampler_dndi[index].dw7.vdi_walker_enable = 0;
1900 sampler_dndi[index].dw7.column_width_minus1 = w / 16;
1902 dri_bo_unmap(pp_context->sampler_state_table.bo);
1904 /* private function & data */
1905 pp_context->pp_x_steps = pp_dndi_x_steps;
1906 pp_context->pp_y_steps = pp_dndi_y_steps;
1907 pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
1909 pp_static_parameter.grf1.statistics_surface_picth = w / 2;
1910 pp_static_parameter.grf1.r1_6.di.top_field_first = 0;
1911 pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m2 = 64;
1912 pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m1 = 192;
1914 pp_inline_parameter.grf5.block_count_x = w / 16; /* 1 x N */
1915 pp_inline_parameter.grf5.number_blocks = w / 16;
1916 pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1917 pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1919 pp_dndi_context->dest_w = w;
1920 pp_dndi_context->dest_h = h;
1924 ironlake_pp_initialize(VADriverContextP ctx,
1925 VASurfaceID surface,
1929 unsigned short srcw,
1930 unsigned short srch,
1933 unsigned short destw,
1934 unsigned short desth,
1937 struct i965_driver_data *i965 = i965_driver_data(ctx);
1938 struct i965_post_processing_context *pp_context = i965->pp_context;
1939 struct pp_module *pp_module;
1943 dri_bo_unreference(pp_context->curbe.bo);
1944 bo = dri_bo_alloc(i965->intel.bufmgr,
1949 pp_context->curbe.bo = bo;
1951 dri_bo_unreference(pp_context->binding_table.bo);
1952 bo = dri_bo_alloc(i965->intel.bufmgr,
1954 sizeof(unsigned int),
1957 pp_context->binding_table.bo = bo;
1959 dri_bo_unreference(pp_context->idrt.bo);
1960 bo = dri_bo_alloc(i965->intel.bufmgr,
1961 "interface discriptor",
1962 sizeof(struct i965_interface_descriptor),
1965 pp_context->idrt.bo = bo;
1966 pp_context->idrt.num_interface_descriptors = 0;
1968 dri_bo_unreference(pp_context->sampler_state_table.bo);
1969 bo = dri_bo_alloc(i965->intel.bufmgr,
1970 "sampler state table",
1974 dri_bo_map(bo, True);
1975 memset(bo->virtual, 0, bo->size);
1977 pp_context->sampler_state_table.bo = bo;
1979 dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1980 bo = dri_bo_alloc(i965->intel.bufmgr,
1981 "sampler 8x8 state ",
1985 pp_context->sampler_state_table.bo_8x8 = bo;
1987 dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1988 bo = dri_bo_alloc(i965->intel.bufmgr,
1989 "sampler 8x8 state ",
1993 pp_context->sampler_state_table.bo_8x8_uv = bo;
1995 dri_bo_unreference(pp_context->vfe_state.bo);
1996 bo = dri_bo_alloc(i965->intel.bufmgr,
1998 sizeof(struct i965_vfe_state),
2001 pp_context->vfe_state.bo = bo;
2003 for (i = 0; i < MAX_PP_SURFACES; i++) {
2004 dri_bo_unreference(pp_context->surfaces[i].ss_bo);
2005 pp_context->surfaces[i].ss_bo = NULL;
2007 dri_bo_unreference(pp_context->surfaces[i].s_bo);
2008 pp_context->surfaces[i].s_bo = NULL;
2011 memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
2012 memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
2013 assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
2015 pp_context->current_pp = pp_index;
2016 pp_module = &pp_modules[pp_index];
2018 if (pp_module->initialize)
2019 pp_module->initialize(ctx, surface, input, srcw, srch, destw, desth);
2023 ironlake_post_processing(VADriverContextP ctx,
2024 VASurfaceID surface,
2028 unsigned short srcw,
2029 unsigned short srch,
2032 unsigned short destw,
2033 unsigned short desth,
2036 ironlake_pp_initialize(ctx, surface, input,
2037 srcx, srcy, srcw, srch,
2038 destx, desty, destw, desth,
2040 ironlake_pp_states_setup(ctx);
2041 ironlake_pp_pipeline_setup(ctx);
2045 gen6_pp_initialize(VADriverContextP ctx,
2046 VASurfaceID surface,
2050 unsigned short srcw,
2051 unsigned short srch,
2054 unsigned short destw,
2055 unsigned short desth,
2058 struct i965_driver_data *i965 = i965_driver_data(ctx);
2059 struct i965_post_processing_context *pp_context = i965->pp_context;
2060 struct pp_module *pp_module;
2064 dri_bo_unreference(pp_context->curbe.bo);
2065 bo = dri_bo_alloc(i965->intel.bufmgr,
2070 pp_context->curbe.bo = bo;
2072 dri_bo_unreference(pp_context->binding_table.bo);
2073 bo = dri_bo_alloc(i965->intel.bufmgr,
2075 sizeof(unsigned int),
2078 pp_context->binding_table.bo = bo;
2080 dri_bo_unreference(pp_context->idrt.bo);
2081 bo = dri_bo_alloc(i965->intel.bufmgr,
2082 "interface discriptor",
2083 sizeof(struct gen6_interface_descriptor_data),
2086 pp_context->idrt.bo = bo;
2087 pp_context->idrt.num_interface_descriptors = 0;
2089 dri_bo_unreference(pp_context->sampler_state_table.bo);
2090 bo = dri_bo_alloc(i965->intel.bufmgr,
2091 "sampler state table",
2095 dri_bo_map(bo, True);
2096 memset(bo->virtual, 0, bo->size);
2098 pp_context->sampler_state_table.bo = bo;
2100 dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
2101 bo = dri_bo_alloc(i965->intel.bufmgr,
2102 "sampler 8x8 state ",
2106 pp_context->sampler_state_table.bo_8x8 = bo;
2108 dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
2109 bo = dri_bo_alloc(i965->intel.bufmgr,
2110 "sampler 8x8 state ",
2114 pp_context->sampler_state_table.bo_8x8_uv = bo;
2116 dri_bo_unreference(pp_context->vfe_state.bo);
2117 bo = dri_bo_alloc(i965->intel.bufmgr,
2119 sizeof(struct i965_vfe_state),
2122 pp_context->vfe_state.bo = bo;
2124 for (i = 0; i < MAX_PP_SURFACES; i++) {
2125 dri_bo_unreference(pp_context->surfaces[i].ss_bo);
2126 pp_context->surfaces[i].ss_bo = NULL;
2128 dri_bo_unreference(pp_context->surfaces[i].s_bo);
2129 pp_context->surfaces[i].s_bo = NULL;
2132 memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
2133 memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
2134 assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
2136 pp_context->current_pp = pp_index;
2137 pp_module = &pp_modules[pp_index];
2139 if (pp_module->initialize)
2140 pp_module->initialize(ctx, surface, input, srcw, srch, destw, desth);
2144 gen6_pp_binding_table(struct i965_post_processing_context *pp_context)
2146 unsigned int *binding_table;
2147 dri_bo *bo = pp_context->binding_table.bo;
2151 assert(bo->virtual);
2152 binding_table = bo->virtual;
2153 memset(binding_table, 0, bo->size);
2155 for (i = 0; i < MAX_PP_SURFACES; i++) {
2156 if (pp_context->surfaces[i].ss_bo) {
2157 assert(pp_context->surfaces[i].s_bo);
2159 binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
2160 dri_bo_emit_reloc(bo,
2161 I915_GEM_DOMAIN_INSTRUCTION, 0,
2163 i * sizeof(*binding_table),
2164 pp_context->surfaces[i].ss_bo);
2173 gen6_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
2175 struct gen6_interface_descriptor_data *desc;
2177 int pp_index = pp_context->current_pp;
2179 bo = pp_context->idrt.bo;
2180 dri_bo_map(bo, True);
2181 assert(bo->virtual);
2183 memset(desc, 0, sizeof(*desc));
2184 desc->desc0.kernel_start_pointer =
2185 pp_modules[pp_index].bo->offset >> 6; /* reloc */
2186 desc->desc1.single_program_flow = 1;
2187 desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
2188 desc->desc2.sampler_count = 1; /* 1 - 4 samplers used */
2189 desc->desc2.sampler_state_pointer =
2190 pp_context->sampler_state_table.bo->offset >> 5;
2191 desc->desc3.binding_table_entry_count = 0;
2192 desc->desc3.binding_table_pointer =
2193 pp_context->binding_table.bo->offset >> 5; /*reloc */
2194 desc->desc4.constant_urb_entry_read_offset = 0;
2195 desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
2197 dri_bo_emit_reloc(bo,
2198 I915_GEM_DOMAIN_INSTRUCTION, 0,
2200 offsetof(struct gen6_interface_descriptor_data, desc0),
2201 pp_modules[pp_index].bo);
2203 dri_bo_emit_reloc(bo,
2204 I915_GEM_DOMAIN_INSTRUCTION, 0,
2205 desc->desc2.sampler_count << 2,
2206 offsetof(struct gen6_interface_descriptor_data, desc2),
2207 pp_context->sampler_state_table.bo);
2209 dri_bo_emit_reloc(bo,
2210 I915_GEM_DOMAIN_INSTRUCTION, 0,
2211 desc->desc3.binding_table_entry_count,
2212 offsetof(struct gen6_interface_descriptor_data, desc3),
2213 pp_context->binding_table.bo);
2216 pp_context->idrt.num_interface_descriptors++;
2220 gen6_pp_upload_constants(struct i965_post_processing_context *pp_context)
2222 unsigned char *constant_buffer;
2224 assert(sizeof(pp_static_parameter) == 128);
2225 dri_bo_map(pp_context->curbe.bo, 1);
2226 assert(pp_context->curbe.bo->virtual);
2227 constant_buffer = pp_context->curbe.bo->virtual;
2228 memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
2229 dri_bo_unmap(pp_context->curbe.bo);
2233 gen6_pp_states_setup(VADriverContextP ctx)
2235 struct i965_driver_data *i965 = i965_driver_data(ctx);
2236 struct i965_post_processing_context *pp_context = i965->pp_context;
2238 gen6_pp_binding_table(pp_context);
2239 gen6_pp_interface_descriptor_table(pp_context);
2240 gen6_pp_upload_constants(pp_context);
2244 gen6_pp_pipeline_select(VADriverContextP ctx)
2246 BEGIN_BATCH(ctx, 1);
2247 OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
2252 gen6_pp_state_base_address(VADriverContextP ctx)
2254 BEGIN_BATCH(ctx, 10);
2255 OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | (10 - 2));
2256 OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2257 OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2258 OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2259 OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2260 OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2261 OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2262 OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2263 OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2264 OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2269 gen6_pp_vfe_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2271 BEGIN_BATCH(ctx, 8);
2272 OUT_BATCH(ctx, CMD_MEDIA_VFE_STATE | (8 - 2));
2275 (pp_context->urb.num_vfe_entries - 1) << 16 |
2276 pp_context->urb.num_vfe_entries << 8);
2279 (pp_context->urb.size_vfe_entry * 2) << 16 | /* in 256 bits unit */
2280 (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 - 1)); /* in 256 bits unit */
2288 gen6_pp_curbe_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2290 assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512 <= pp_context->curbe.bo->size);
2292 BEGIN_BATCH(ctx, 4);
2293 OUT_BATCH(ctx, CMD_MEDIA_CURBE_LOAD | (4 - 2));
2296 pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512);
2298 pp_context->curbe.bo,
2299 I915_GEM_DOMAIN_INSTRUCTION, 0,
2305 gen6_interface_descriptor_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2307 BEGIN_BATCH(ctx, 4);
2308 OUT_BATCH(ctx, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
2311 pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
2313 pp_context->idrt.bo,
2314 I915_GEM_DOMAIN_INSTRUCTION, 0,
2320 gen6_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2322 int x, x_steps, y, y_steps;
2324 x_steps = pp_context->pp_x_steps(&pp_context->private_context);
2325 y_steps = pp_context->pp_y_steps(&pp_context->private_context);
2327 for (y = 0; y < y_steps; y++) {
2328 for (x = 0; x < x_steps; x++) {
2329 if (!pp_context->pp_set_block_parameter(&pp_context->private_context, x, y)) {
2330 BEGIN_BATCH(ctx, 22);
2331 OUT_BATCH(ctx, CMD_MEDIA_OBJECT | 20);
2333 OUT_BATCH(ctx, 0); /* no indirect data */
2335 OUT_BATCH(ctx, 0); /* scoreboard */
2338 /* inline data grf 5-6 */
2339 assert(sizeof(pp_inline_parameter) == 64);
2340 intel_batchbuffer_data(ctx, &pp_inline_parameter, sizeof(pp_inline_parameter));
2349 gen6_pp_pipeline_setup(VADriverContextP ctx)
2351 struct i965_driver_data *i965 = i965_driver_data(ctx);
2352 struct i965_post_processing_context *pp_context = i965->pp_context;
2354 intel_batchbuffer_start_atomic(ctx, 0x1000);
2355 intel_batchbuffer_emit_mi_flush(ctx);
2356 gen6_pp_pipeline_select(ctx);
2357 gen6_pp_curbe_load(ctx, pp_context);
2358 gen6_interface_descriptor_load(ctx, pp_context);
2359 gen6_pp_state_base_address(ctx);
2360 gen6_pp_vfe_state(ctx, pp_context);
2361 gen6_pp_object_walker(ctx, pp_context);
2362 intel_batchbuffer_end_atomic(ctx);
2366 gen6_post_processing(VADriverContextP ctx,
2367 VASurfaceID surface,
2371 unsigned short srcw,
2372 unsigned short srch,
2375 unsigned short destw,
2376 unsigned short desth,
2379 gen6_pp_initialize(ctx, surface, input,
2380 srcx, srcy, srcw, srch,
2381 destx, desty, destw, desth,
2383 gen6_pp_states_setup(ctx);
2384 gen6_pp_pipeline_setup(ctx);
2388 i965_post_processing_internal(VADriverContextP ctx,
2389 VASurfaceID surface,
2393 unsigned short srcw,
2394 unsigned short srch,
2397 unsigned short destw,
2398 unsigned short desth,
2401 struct i965_driver_data *i965 = i965_driver_data(ctx);
2403 if (IS_GEN6(i965->intel.device_id))
2404 gen6_post_processing(ctx, surface, input,
2405 srcx, srcy, srcw, srch,
2406 destx, desty, destw, desth,
2409 ironlake_post_processing(ctx, surface, input,
2410 srcx, srcy, srcw, srch,
2411 destx, desty, destw, desth,
2416 i965_post_processing(VADriverContextP ctx,
2417 VASurfaceID surface,
2420 unsigned short srcw,
2421 unsigned short srch,
2424 unsigned short destw,
2425 unsigned short desth,
2428 struct i965_driver_data *i965 = i965_driver_data(ctx);
2431 /* Currently only support post processing for NV12 surface */
2432 if (i965->render_state.interleaved_uv) {
2433 int internal_input = 0;
2435 if (flag & I965_PP_FLAG_DEINTERLACING) {
2436 i965_post_processing_internal(ctx, surface, internal_input,
2437 srcx, srcy, srcw, srch,
2438 destx, desty, destw, desth,
2443 if (flag & I965_PP_FLAG_AVS) {
2444 i965_post_processing_internal(ctx, surface, internal_input,
2445 srcx, srcy, srcw, srch,
2446 destx, desty, destw, desth,
2454 i965_post_processing_terminate(VADriverContextP ctx)
2456 struct i965_driver_data *i965 = i965_driver_data(ctx);
2457 struct i965_post_processing_context *pp_context = i965->pp_context;
2462 dri_bo_unreference(pp_context->curbe.bo);
2463 pp_context->curbe.bo = NULL;
2465 for (i = 0; i < MAX_PP_SURFACES; i++) {
2466 dri_bo_unreference(pp_context->surfaces[i].ss_bo);
2467 pp_context->surfaces[i].ss_bo = NULL;
2469 dri_bo_unreference(pp_context->surfaces[i].s_bo);
2470 pp_context->surfaces[i].s_bo = NULL;
2473 dri_bo_unreference(pp_context->sampler_state_table.bo);
2474 pp_context->sampler_state_table.bo = NULL;
2476 dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
2477 pp_context->sampler_state_table.bo_8x8 = NULL;
2479 dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
2480 pp_context->sampler_state_table.bo_8x8_uv = NULL;
2482 dri_bo_unreference(pp_context->binding_table.bo);
2483 pp_context->binding_table.bo = NULL;
2485 dri_bo_unreference(pp_context->idrt.bo);
2486 pp_context->idrt.bo = NULL;
2487 pp_context->idrt.num_interface_descriptors = 0;
2489 dri_bo_unreference(pp_context->vfe_state.bo);
2490 pp_context->vfe_state.bo = NULL;
2492 dri_bo_unreference(pp_context->stmm.bo);
2493 pp_context->stmm.bo = NULL;
2498 i965->pp_context = NULL;
2500 for (i = 0; i < NUM_PP_MODULES && pp_modules; i++) {
2501 struct pp_module *pp_module = &pp_modules[i];
2503 dri_bo_unreference(pp_module->bo);
2504 pp_module->bo = NULL;
2512 i965_post_processing_init(VADriverContextP ctx)
2514 struct i965_driver_data *i965 = i965_driver_data(ctx);
2515 struct i965_post_processing_context *pp_context = i965->pp_context;
2519 if (pp_context == NULL) {
2520 pp_context = calloc(1, sizeof(*pp_context));
2521 i965->pp_context = pp_context;
2524 pp_context->urb.size = URB_SIZE((&i965->intel));
2525 pp_context->urb.num_vfe_entries = 32;
2526 pp_context->urb.size_vfe_entry = 1; /* in 512 bits unit */
2527 pp_context->urb.num_cs_entries = 1;
2528 pp_context->urb.size_cs_entry = 2; /* in 512 bits unit */
2529 pp_context->urb.vfe_start = 0;
2530 pp_context->urb.cs_start = pp_context->urb.vfe_start +
2531 pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
2532 assert(pp_context->urb.cs_start +
2533 pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
2535 assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
2537 if (IS_GEN6(i965->intel.device_id))
2538 pp_modules = pp_modules_gen6;
2539 else if (IS_IRONLAKE(i965->intel.device_id)) {
2540 pp_modules = pp_modules_gen5;
2543 for (i = 0; i < NUM_PP_MODULES && pp_modules; i++) {
2544 struct pp_module *pp_module = &pp_modules[i];
2545 dri_bo_unreference(pp_module->bo);
2546 pp_module->bo = dri_bo_alloc(i965->intel.bufmgr,
2550 assert(pp_module->bo);
2551 dri_bo_subdata(pp_module->bo, 0, pp_module->size, pp_module->bin);