4dba0c6bb7cd6d34cee8b6a449cb0b684c93f49a
[platform/upstream/libva.git] / i965_drv_video / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include <va/va_backend.h>
35
36 #include "intel_batchbuffer.h"
37 #include "intel_driver.h"
38 #include "i965_defines.h"
39 #include "i965_structs.h"
40
41 #include "i965_post_processing.h"
42 #include "i965_render.h"
43 #include "i965_drv_video.h"
44
45 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
46                      IS_GEN6((ctx)->intel.device_id))
47
48 struct pp_module
49 {
50     struct i965_kernel kernel;
51     
52     /* others */
53     void (*initialize)(VADriverContextP ctx, VASurfaceID surface, int input,
54                        unsigned short srcw, unsigned short srch,
55                        unsigned short destw, unsigned short desth);
56 };
57
58 static const uint32_t pp_null_gen5[][4] = {
59 #include "shaders/post_processing/null.g4b.gen5"
60 };
61
62 static const uint32_t pp_nv12_load_save_gen5[][4] = {
63 #include "shaders/post_processing/nv12_load_save_nv12.g4b.gen5"
64 };
65
66 static const uint32_t pp_nv12_scaling_gen5[][4] = {
67 #include "shaders/post_processing/nv12_scaling_nv12.g4b.gen5"
68 };
69
70 static const uint32_t pp_nv12_avs_gen5[][4] = {
71 #include "shaders/post_processing/nv12_avs_nv12.g4b.gen5"
72 };
73
74 static const uint32_t pp_nv12_dndi_gen5[][4] = {
75 #include "shaders/post_processing/nv12_dndi_nv12.g4b.gen5"
76 };
77
78 static void pp_null_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
79                                unsigned short srcw, unsigned short srch,
80                                unsigned short destw, unsigned short desth);
81 static void pp_nv12_avs_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
82                                    unsigned short srcw, unsigned short srch,
83                                    unsigned short destw, unsigned short desth);
84 static void pp_nv12_scaling_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
85                                        unsigned short srcw, unsigned short srch,
86                                        unsigned short destw, unsigned short desth);
87 static void pp_nv12_load_save_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
88                                          unsigned short srcw, unsigned short srch,
89                                          unsigned short destw, unsigned short desth);
90 static void pp_nv12_dndi_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
91                                     unsigned short srcw, unsigned short srch,
92                                     unsigned short destw, unsigned short desth);
93
94 static struct pp_module pp_modules_gen5[] = {
95     {
96         {
97             "NULL module (for testing)",
98             PP_NULL,
99             pp_null_gen5,
100             sizeof(pp_null_gen5),
101             NULL,
102         },
103
104         pp_null_initialize,
105     },
106
107     {
108         {
109             "NV12 Load & Save module",
110             PP_NV12_LOAD_SAVE,
111             pp_nv12_load_save_gen5,
112             sizeof(pp_nv12_load_save_gen5),
113             NULL,
114         },
115
116         pp_nv12_load_save_initialize,
117     },
118
119     {
120         {
121             "NV12 Scaling module",
122             PP_NV12_SCALING,
123             pp_nv12_scaling_gen5,
124             sizeof(pp_nv12_scaling_gen5),
125             NULL,
126         },
127
128         pp_nv12_scaling_initialize,
129     },
130
131     {
132         {
133             "NV12 AVS module",
134             PP_NV12_AVS,
135             pp_nv12_avs_gen5,
136             sizeof(pp_nv12_avs_gen5),
137             NULL,
138         },
139
140         pp_nv12_avs_initialize,
141     },
142
143     {
144         {
145             "NV12 DNDI module",
146             PP_NV12_DNDI,
147             pp_nv12_dndi_gen5,
148             sizeof(pp_nv12_dndi_gen5),
149             NULL,
150         },
151
152         pp_nv12_dndi_initialize,
153     },
154 };
155
156 static const uint32_t pp_null_gen6[][4] = {
157 #include "shaders/post_processing/null.g6b"
158 };
159
160 static const uint32_t pp_nv12_load_save_gen6[][4] = {
161 #include "shaders/post_processing/nv12_load_save_nv12.g6b"
162 };
163
164 static const uint32_t pp_nv12_scaling_gen6[][4] = {
165 #include "shaders/post_processing/nv12_scaling_nv12.g6b"
166 };
167
168 static const uint32_t pp_nv12_avs_gen6[][4] = {
169 #include "shaders/post_processing/nv12_avs_nv12.g6b"
170 };
171
172 static const uint32_t pp_nv12_dndi_gen6[][4] = {
173 #include "shaders/post_processing/nv12_dndi_nv12.g6b"
174 };
175
176 static struct pp_module pp_modules_gen6[] = {
177     {
178         {
179             "NULL module (for testing)",
180             PP_NULL,
181             pp_null_gen6,
182             sizeof(pp_null_gen6),
183             NULL,
184         },
185
186         pp_null_initialize,
187     },
188
189     {
190         {
191             "NV12 Load & Save module",
192             PP_NV12_LOAD_SAVE,
193             pp_nv12_load_save_gen6,
194             sizeof(pp_nv12_load_save_gen6),
195             NULL,
196         },
197
198         pp_nv12_load_save_initialize,
199     },
200
201     {
202         {
203             "NV12 Scaling module",
204             PP_NV12_SCALING,
205             pp_nv12_scaling_gen6,
206             sizeof(pp_nv12_scaling_gen6),
207             NULL,
208         },
209
210         pp_nv12_scaling_initialize,
211     },
212
213     {
214         {
215             "NV12 AVS module",
216             PP_NV12_AVS,
217             pp_nv12_avs_gen6,
218             sizeof(pp_nv12_avs_gen6),
219             NULL,
220         },
221
222         pp_nv12_avs_initialize,
223     },
224
225     {
226         {
227             "NV12 DNDI module",
228             PP_NV12_DNDI,
229             pp_nv12_dndi_gen6,
230             sizeof(pp_nv12_dndi_gen6),
231             NULL,
232         },
233
234         pp_nv12_dndi_initialize,
235     },
236 };
237
238 #define NUM_PP_MODULES ARRAY_ELEMS(pp_modules_gen5)
239
240 static struct pp_module *pp_modules = NULL;
241
242 struct pp_static_parameter
243 {
244     struct {
245         /* Procamp r1.0 */
246         float procamp_constant_c0;
247         
248         /* Load and Same r1.1 */
249         unsigned int source_packed_y_offset:8;
250         unsigned int source_packed_u_offset:8;
251         unsigned int source_packed_v_offset:8;
252         unsigned int pad0:8;
253
254         union {
255             /* Load and Save r1.2 */
256             struct {
257                 unsigned int destination_packed_y_offset:8;
258                 unsigned int destination_packed_u_offset:8;
259                 unsigned int destination_packed_v_offset:8;
260                 unsigned int pad0:8;
261             } load_and_save;
262
263             /* CSC r1.2 */
264             struct {
265                 unsigned int destination_rgb_format:8;
266                 unsigned int pad0:24;
267             } csc;
268         } r1_2;
269         
270         /* Procamp r1.3 */
271         float procamp_constant_c1;
272
273         /* Procamp r1.4 */
274         float procamp_constant_c2;
275
276         /* DI r1.5 */
277         unsigned int statistics_surface_picth:16;  /* Devided by 2 */
278         unsigned int pad1:16;
279
280         union {
281             /* DI r1.6 */
282             struct {
283                 unsigned int pad0:24;
284                 unsigned int top_field_first:8;
285             } di;
286
287             /* AVS/Scaling r1.6 */
288             float normalized_video_y_scaling_step;
289         } r1_6;
290
291         /* Procamp r1.7 */
292         float procamp_constant_c5;
293     } grf1;
294     
295     struct {
296         /* Procamp r2.0 */
297         float procamp_constant_c3;
298
299         /* MBZ r2.1*/
300         unsigned int pad0;
301
302         /* WG+CSC r2.2 */
303         float wg_csc_constant_c4;
304
305         /* WG+CSC r2.3 */
306         float wg_csc_constant_c8;
307
308         /* Procamp r2.4 */
309         float procamp_constant_c4;
310
311         /* MBZ r2.5 */
312         unsigned int pad1;
313
314         /* MBZ r2.6 */
315         unsigned int pad2;
316
317         /* WG+CSC r2.7 */
318         float wg_csc_constant_c9;
319     } grf2;
320
321     struct {
322         /* WG+CSC r3.0 */
323         float wg_csc_constant_c0;
324
325         /* Blending r3.1 */
326         float scaling_step_ratio;
327
328         /* Blending r3.2 */
329         float normalized_alpha_y_scaling;
330         
331         /* WG+CSC r3.3 */
332         float wg_csc_constant_c4;
333
334         /* WG+CSC r3.4 */
335         float wg_csc_constant_c1;
336
337         /* ALL r3.5 */
338         int horizontal_origin_offset:16;
339         int vertical_origin_offset:16;
340
341         /* Shared r3.6*/
342         union {
343             /* Color filll */
344             unsigned int color_pixel;
345
346             /* WG+CSC */
347             float wg_csc_constant_c2;
348         } r3_6;
349
350         /* WG+CSC r3.7 */
351         float wg_csc_constant_c3;
352     } grf3;
353
354     struct {
355         /* WG+CSC r4.0 */
356         float wg_csc_constant_c6;
357
358         /* ALL r4.1 MBZ ???*/
359         unsigned int pad0;
360
361         /* Shared r4.2 */
362         union {
363             /* AVS */
364             struct {
365                 unsigned int pad1:15;
366                 unsigned int nlas:1;
367                 unsigned int pad2:16;
368             } avs;
369
370             /* DI */
371             struct {
372                 unsigned int motion_history_coefficient_m2:8;
373                 unsigned int motion_history_coefficient_m1:8;
374                 unsigned int pad0:16;
375             } di;
376         } r4_2;
377
378         /* WG+CSC r4.3 */
379         float wg_csc_constant_c7;
380
381         /* WG+CSC r4.4 */
382         float wg_csc_constant_c10;
383
384         /* AVS r4.5 */
385         float source_video_frame_normalized_horizontal_origin;
386
387         /* MBZ r4.6 */
388         unsigned int pad1;
389
390         /* WG+CSC r4.7 */
391         float wg_csc_constant_c11;
392     } grf4;
393 };
394
395 struct pp_inline_parameter
396 {
397     struct {
398         /* ALL r5.0 */
399         int destination_block_horizontal_origin:16;
400         int destination_block_vertical_origin:16;
401
402         /* Shared r5.1 */
403         union {
404             /* AVS/Scaling */
405             float source_surface_block_normalized_horizontal_origin;
406
407             /* FMD */
408             struct {
409                 unsigned int variance_surface_vertical_origin:16;
410                 unsigned int pad0:16;
411             } fmd;
412         } r5_1; 
413
414         /* AVS/Scaling r5.2 */
415         float source_surface_block_normalized_vertical_origin;
416
417         /* Alpha r5.3 */
418         float alpha_surface_block_normalized_horizontal_origin;
419
420         /* Alpha r5.4 */
421         float alpha_surface_block_normalized_vertical_origin;
422
423         /* Alpha r5.5 */
424         unsigned int alpha_mask_x:16;
425         unsigned int alpha_mask_y:8;
426         unsigned int block_count_x:8;
427
428         /* r5.6 */
429         unsigned int block_horizontal_mask:16;
430         unsigned int block_vertical_mask:8;
431         unsigned int number_blocks:8;
432
433         /* AVS/Scaling r5.7 */
434         float normalized_video_x_scaling_step;
435     } grf5;
436
437     struct {
438         /* AVS r6.0 */
439         float video_step_delta;
440
441         /* r6.1-r6.7 */
442         unsigned int padx[7];
443     } grf6;
444 };
445
446 static struct pp_static_parameter pp_static_parameter;
447 static struct pp_inline_parameter pp_inline_parameter;
448
449 static void
450 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
451 {
452     switch (tiling) {
453     case I915_TILING_NONE:
454         ss->ss3.tiled_surface = 0;
455         ss->ss3.tile_walk = 0;
456         break;
457     case I915_TILING_X:
458         ss->ss3.tiled_surface = 1;
459         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
460         break;
461     case I915_TILING_Y:
462         ss->ss3.tiled_surface = 1;
463         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
464         break;
465     }
466 }
467
468 static void
469 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
470 {
471     switch (tiling) {
472     case I915_TILING_NONE:
473         ss->ss2.tiled_surface = 0;
474         ss->ss2.tile_walk = 0;
475         break;
476     case I915_TILING_X:
477         ss->ss2.tiled_surface = 1;
478         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
479         break;
480     case I915_TILING_Y:
481         ss->ss2.tiled_surface = 1;
482         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
483         break;
484     }
485 }
486
487 static void
488 ironlake_pp_surface_state(struct i965_post_processing_context *pp_context)
489 {
490
491 }
492
493 static void
494 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
495 {
496     struct i965_interface_descriptor *desc;
497     dri_bo *bo;
498     int pp_index = pp_context->current_pp;
499
500     bo = pp_context->idrt.bo;
501     dri_bo_map(bo, 1);
502     assert(bo->virtual);
503     desc = bo->virtual;
504     memset(desc, 0, sizeof(*desc));
505     desc->desc0.grf_reg_blocks = 10;
506     desc->desc0.kernel_start_pointer = pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
507     desc->desc1.const_urb_entry_read_offset = 0;
508     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
509     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
510     desc->desc2.sampler_count = 0;
511     desc->desc3.binding_table_entry_count = 0;
512     desc->desc3.binding_table_pointer = 
513         pp_context->binding_table.bo->offset >> 5; /*reloc */
514
515     dri_bo_emit_reloc(bo,
516                       I915_GEM_DOMAIN_INSTRUCTION, 0,
517                       desc->desc0.grf_reg_blocks,
518                       offsetof(struct i965_interface_descriptor, desc0),
519                       pp_modules[pp_index].kernel.bo);
520
521     dri_bo_emit_reloc(bo,
522                       I915_GEM_DOMAIN_INSTRUCTION, 0,
523                       desc->desc2.sampler_count << 2,
524                       offsetof(struct i965_interface_descriptor, desc2),
525                       pp_context->sampler_state_table.bo);
526
527     dri_bo_emit_reloc(bo,
528                       I915_GEM_DOMAIN_INSTRUCTION, 0,
529                       desc->desc3.binding_table_entry_count,
530                       offsetof(struct i965_interface_descriptor, desc3),
531                       pp_context->binding_table.bo);
532
533     dri_bo_unmap(bo);
534     pp_context->idrt.num_interface_descriptors++;
535 }
536
537 static void
538 ironlake_pp_binding_table(struct i965_post_processing_context *pp_context)
539 {
540     unsigned int *binding_table;
541     dri_bo *bo = pp_context->binding_table.bo;
542     int i;
543
544     dri_bo_map(bo, 1);
545     assert(bo->virtual);
546     binding_table = bo->virtual;
547     memset(binding_table, 0, bo->size);
548
549     for (i = 0; i < MAX_PP_SURFACES; i++) {
550         if (pp_context->surfaces[i].ss_bo) {
551             assert(pp_context->surfaces[i].s_bo);
552
553             binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
554             dri_bo_emit_reloc(bo,
555                               I915_GEM_DOMAIN_INSTRUCTION, 0,
556                               0,
557                               i * sizeof(*binding_table),
558                               pp_context->surfaces[i].ss_bo);
559         }
560     
561     }
562
563     dri_bo_unmap(bo);
564 }
565
566 static void
567 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
568 {
569     struct i965_vfe_state *vfe_state;
570     dri_bo *bo;
571
572     bo = pp_context->vfe_state.bo;
573     dri_bo_map(bo, 1);
574     assert(bo->virtual);
575     vfe_state = bo->virtual;
576     memset(vfe_state, 0, sizeof(*vfe_state));
577     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
578     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
579     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
580     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
581     vfe_state->vfe1.children_present = 0;
582     vfe_state->vfe2.interface_descriptor_base = 
583         pp_context->idrt.bo->offset >> 4; /* reloc */
584     dri_bo_emit_reloc(bo,
585                       I915_GEM_DOMAIN_INSTRUCTION, 0,
586                       0,
587                       offsetof(struct i965_vfe_state, vfe2),
588                       pp_context->idrt.bo);
589     dri_bo_unmap(bo);
590 }
591
592 static void
593 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
594 {
595     unsigned char *constant_buffer;
596
597     assert(sizeof(pp_static_parameter) == 128);
598     dri_bo_map(pp_context->curbe.bo, 1);
599     assert(pp_context->curbe.bo->virtual);
600     constant_buffer = pp_context->curbe.bo->virtual;
601     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
602     dri_bo_unmap(pp_context->curbe.bo);
603 }
604
605 static void
606 ironlake_pp_states_setup(VADriverContextP ctx)
607 {
608     struct i965_driver_data *i965 = i965_driver_data(ctx);
609     struct i965_post_processing_context *pp_context = i965->pp_context;
610
611     ironlake_pp_surface_state(pp_context);
612     ironlake_pp_binding_table(pp_context);
613     ironlake_pp_interface_descriptor_table(pp_context);
614     ironlake_pp_vfe_state(pp_context);
615     ironlake_pp_upload_constants(pp_context);
616 }
617
618 static void
619 ironlake_pp_pipeline_select(VADriverContextP ctx)
620 {
621     BEGIN_BATCH(ctx, 1);
622     OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
623     ADVANCE_BATCH(ctx);
624 }
625
626 static void
627 ironlake_pp_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
628 {
629     unsigned int vfe_fence, cs_fence;
630
631     vfe_fence = pp_context->urb.cs_start;
632     cs_fence = pp_context->urb.size;
633
634     BEGIN_BATCH(ctx, 3);
635     OUT_BATCH(ctx, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
636     OUT_BATCH(ctx, 0);
637     OUT_BATCH(ctx, 
638               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
639               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
640     ADVANCE_BATCH(ctx);
641 }
642
643 static void
644 ironlake_pp_state_base_address(VADriverContextP ctx)
645 {
646     BEGIN_BATCH(ctx, 8);
647     OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6);
648     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
649     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
650     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
651     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
652     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
653     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
654     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
655     ADVANCE_BATCH(ctx);
656 }
657
658 static void
659 ironlake_pp_state_pointers(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
660 {
661     BEGIN_BATCH(ctx, 3);
662     OUT_BATCH(ctx, CMD_MEDIA_STATE_POINTERS | 1);
663     OUT_BATCH(ctx, 0);
664     OUT_RELOC(ctx, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
665     ADVANCE_BATCH(ctx);
666 }
667
668 static void 
669 ironlake_pp_cs_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
670 {
671     BEGIN_BATCH(ctx, 2);
672     OUT_BATCH(ctx, CMD_CS_URB_STATE | 0);
673     OUT_BATCH(ctx,
674               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
675               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
676     ADVANCE_BATCH(ctx);
677 }
678
679 static void
680 ironlake_pp_constant_buffer(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
681 {
682     BEGIN_BATCH(ctx, 2);
683     OUT_BATCH(ctx, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
684     OUT_RELOC(ctx, pp_context->curbe.bo,
685               I915_GEM_DOMAIN_INSTRUCTION, 0,
686               pp_context->urb.size_cs_entry - 1);
687     ADVANCE_BATCH(ctx);    
688 }
689
690 static void
691 ironlake_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
692 {
693     int x, x_steps, y, y_steps;
694
695     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
696     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
697
698     for (y = 0; y < y_steps; y++) {
699         for (x = 0; x < x_steps; x++) {
700             if (!pp_context->pp_set_block_parameter(&pp_context->private_context, x, y)) {
701                 BEGIN_BATCH(ctx, 20);
702                 OUT_BATCH(ctx, CMD_MEDIA_OBJECT | 18);
703                 OUT_BATCH(ctx, 0);
704                 OUT_BATCH(ctx, 0); /* no indirect data */
705                 OUT_BATCH(ctx, 0);
706
707                 /* inline data grf 5-6 */
708                 assert(sizeof(pp_inline_parameter) == 64);
709                 intel_batchbuffer_data(ctx, &pp_inline_parameter, sizeof(pp_inline_parameter));
710
711                 ADVANCE_BATCH(ctx);
712             }
713         }
714     }
715 }
716
717 static void
718 ironlake_pp_pipeline_setup(VADriverContextP ctx)
719 {
720     struct i965_driver_data *i965 = i965_driver_data(ctx);
721     struct i965_post_processing_context *pp_context = i965->pp_context;
722
723     intel_batchbuffer_start_atomic(ctx, 0x1000);
724     intel_batchbuffer_emit_mi_flush(ctx);
725     ironlake_pp_pipeline_select(ctx);
726     ironlake_pp_state_base_address(ctx);
727     ironlake_pp_state_pointers(ctx, pp_context);
728     ironlake_pp_urb_layout(ctx, pp_context);
729     ironlake_pp_cs_urb_layout(ctx, pp_context);
730     ironlake_pp_constant_buffer(ctx, pp_context);
731     ironlake_pp_object_walker(ctx, pp_context);
732     intel_batchbuffer_end_atomic(ctx);
733 }
734
735 static int
736 pp_null_x_steps(void *private_context)
737 {
738     return 1;
739 }
740
741 static int
742 pp_null_y_steps(void *private_context)
743 {
744     return 1;
745 }
746
747 static int
748 pp_null_set_block_parameter(void *private_context, int x, int y)
749 {
750     return 0;
751 }
752
753 static void
754 pp_null_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
755                    unsigned short srcw, unsigned short srch,
756                    unsigned short destw, unsigned short desth)
757 {
758     struct i965_driver_data *i965 = i965_driver_data(ctx);
759     struct i965_post_processing_context *pp_context = i965->pp_context;
760     struct object_surface *obj_surface;
761
762     /* surface */
763     obj_surface = SURFACE(surface);
764     dri_bo_unreference(obj_surface->pp_out_bo);
765     obj_surface->pp_out_bo = obj_surface->bo;
766     dri_bo_reference(obj_surface->pp_out_bo);
767     assert(obj_surface->pp_out_bo);
768     obj_surface->pp_out_width = obj_surface->width;
769     obj_surface->pp_out_height = obj_surface->height;
770     obj_surface->orig_pp_out_width = obj_surface->orig_width;
771     obj_surface->orig_pp_out_height = obj_surface->orig_height;
772
773     /* private function & data */
774     pp_context->pp_x_steps = pp_null_x_steps;
775     pp_context->pp_y_steps = pp_null_y_steps;
776     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
777 }
778
779 static int
780 pp_load_save_x_steps(void *private_context)
781 {
782     return 1;
783 }
784
785 static int
786 pp_load_save_y_steps(void *private_context)
787 {
788     struct pp_load_save_context *pp_load_save_context = private_context;
789
790     return pp_load_save_context->dest_h / 8;
791 }
792
793 static int
794 pp_load_save_set_block_parameter(void *private_context, int x, int y)
795 {
796     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
797     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
798     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
799     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
800
801     return 0;
802 }
803
804 static void
805 pp_nv12_load_save_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
806                              unsigned short srcw, unsigned short srch,
807                              unsigned short destw, unsigned short desth)
808 {
809     struct i965_driver_data *i965 = i965_driver_data(ctx);
810     struct i965_post_processing_context *pp_context = i965->pp_context;
811     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
812     struct object_surface *obj_surface;
813     struct i965_surface_state *ss;
814     dri_bo *bo;
815     int index, w, h;
816     int orig_w, orig_h;
817     unsigned int tiling, swizzle;
818
819     /* surface */
820     obj_surface = SURFACE(surface);
821     orig_w = obj_surface->orig_width;
822     orig_h = obj_surface->orig_height;
823     w = obj_surface->width;
824     h = obj_surface->height;
825
826     dri_bo_unreference(obj_surface->pp_out_bo);
827     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
828                                           "intermediate surface",
829                                           SIZE_YUV420(w, h),
830                                           4096);
831     assert(obj_surface->pp_out_bo);
832     obj_surface->pp_out_width = obj_surface->width;
833     obj_surface->pp_out_height = obj_surface->height;
834     obj_surface->orig_pp_out_width = obj_surface->orig_width;
835     obj_surface->orig_pp_out_height = obj_surface->orig_height;
836
837     /* source Y surface index 1 */
838     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
839
840     index = 1;
841     pp_context->surfaces[index].s_bo = obj_surface->bo;
842     dri_bo_reference(pp_context->surfaces[index].s_bo);
843     bo = dri_bo_alloc(i965->intel.bufmgr, 
844                       "surface state", 
845                       sizeof(struct i965_surface_state), 
846                       4096);
847     assert(bo);
848     pp_context->surfaces[index].ss_bo = bo;
849     dri_bo_map(bo, True);
850     assert(bo->virtual);
851     ss = bo->virtual;
852     memset(ss, 0, sizeof(*ss));
853     ss->ss0.surface_type = I965_SURFACE_2D;
854     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
855     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
856     ss->ss2.width = orig_w / 4 - 1;
857     ss->ss2.height = orig_h - 1;
858     ss->ss3.pitch = w - 1;
859     pp_set_surface_tiling(ss, tiling);
860     dri_bo_emit_reloc(bo,
861                       I915_GEM_DOMAIN_RENDER, 
862                       0,
863                       0,
864                       offsetof(struct i965_surface_state, ss1),
865                       pp_context->surfaces[index].s_bo);
866     dri_bo_unmap(bo);
867
868     /* source UV surface index 2 */
869     index = 2;
870     pp_context->surfaces[index].s_bo = obj_surface->bo;
871     dri_bo_reference(pp_context->surfaces[index].s_bo);
872     bo = dri_bo_alloc(i965->intel.bufmgr, 
873                       "surface state", 
874                       sizeof(struct i965_surface_state), 
875                       4096);
876     assert(bo);
877     pp_context->surfaces[index].ss_bo = bo;
878     dri_bo_map(bo, True);
879     assert(bo->virtual);
880     ss = bo->virtual;
881     memset(ss, 0, sizeof(*ss));
882     ss->ss0.surface_type = I965_SURFACE_2D;
883     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
884     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
885     ss->ss2.width = orig_w / 4 - 1;
886     ss->ss2.height = orig_h / 2 - 1;
887     ss->ss3.pitch = w - 1;
888     pp_set_surface_tiling(ss, tiling);
889     dri_bo_emit_reloc(bo,
890                       I915_GEM_DOMAIN_RENDER, 
891                       0,
892                       w * h,
893                       offsetof(struct i965_surface_state, ss1),
894                       pp_context->surfaces[index].s_bo);
895     dri_bo_unmap(bo);
896
897     /* destination Y surface index 7 */
898     index = 7;
899     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
900     dri_bo_reference(pp_context->surfaces[index].s_bo);
901     bo = dri_bo_alloc(i965->intel.bufmgr, 
902                       "surface state", 
903                       sizeof(struct i965_surface_state), 
904                       4096);
905     assert(bo);
906     pp_context->surfaces[index].ss_bo = bo;
907     dri_bo_map(bo, True);
908     assert(bo->virtual);
909     ss = bo->virtual;
910     memset(ss, 0, sizeof(*ss));
911     ss->ss0.surface_type = I965_SURFACE_2D;
912     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
913     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
914     ss->ss2.width = orig_w / 4 - 1;
915     ss->ss2.height = orig_h - 1;
916     ss->ss3.pitch = w - 1;
917     dri_bo_emit_reloc(bo,
918                       I915_GEM_DOMAIN_RENDER, 
919                       I915_GEM_DOMAIN_RENDER,
920                       0,
921                       offsetof(struct i965_surface_state, ss1),
922                       pp_context->surfaces[index].s_bo);
923     dri_bo_unmap(bo);
924
925     /* destination UV surface index 8 */
926     index = 8;
927     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
928     dri_bo_reference(pp_context->surfaces[index].s_bo);
929     bo = dri_bo_alloc(i965->intel.bufmgr, 
930                       "surface state", 
931                       sizeof(struct i965_surface_state), 
932                       4096);
933     assert(bo);
934     pp_context->surfaces[index].ss_bo = bo;
935     dri_bo_map(bo, True);
936     assert(bo->virtual);
937     ss = bo->virtual;
938     memset(ss, 0, sizeof(*ss));
939     ss->ss0.surface_type = I965_SURFACE_2D;
940     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
941     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
942     ss->ss2.width = orig_w / 4 - 1;
943     ss->ss2.height = orig_h / 2 - 1;
944     ss->ss3.pitch = w - 1;
945     dri_bo_emit_reloc(bo,
946                       I915_GEM_DOMAIN_RENDER, 
947                       I915_GEM_DOMAIN_RENDER,
948                       w * h,
949                       offsetof(struct i965_surface_state, ss1),
950                       pp_context->surfaces[index].s_bo);
951     dri_bo_unmap(bo);
952
953     /* private function & data */
954     pp_context->pp_x_steps = pp_load_save_x_steps;
955     pp_context->pp_y_steps = pp_load_save_y_steps;
956     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
957     pp_load_save_context->dest_h = h;
958     pp_load_save_context->dest_w = w;
959
960     pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
961     pp_inline_parameter.grf5.number_blocks = w / 16;
962 }
963
964 static int
965 pp_scaling_x_steps(void *private_context)
966 {
967     return 1;
968 }
969
970 static int
971 pp_scaling_y_steps(void *private_context)
972 {
973     struct pp_scaling_context *pp_scaling_context = private_context;
974
975     return pp_scaling_context->dest_h / 8;
976 }
977
978 static int
979 pp_scaling_set_block_parameter(void *private_context, int x, int y)
980 {
981     float src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
982     float src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
983
984     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16;
985     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8;
986     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
987     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
988     
989     return 0;
990 }
991
992 static void
993 pp_nv12_scaling_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
994                            unsigned short srcw, unsigned short srch,
995                            unsigned short destw, unsigned short desth)
996 {
997     struct i965_driver_data *i965 = i965_driver_data(ctx);
998     struct i965_post_processing_context *pp_context = i965->pp_context;
999     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
1000     struct object_surface *obj_surface;
1001     struct i965_sampler_state *sampler_state;
1002     struct i965_surface_state *ss;
1003     dri_bo *bo;
1004     int index;
1005     int w, h;
1006     int orig_w, orig_h;
1007     int pp_out_w, pp_out_h;
1008     int orig_pp_out_w, orig_pp_out_h;
1009     unsigned int tiling, swizzle;
1010
1011     /* surface */
1012     obj_surface = SURFACE(surface);
1013     orig_w = obj_surface->orig_width;
1014     orig_h = obj_surface->orig_height;
1015     w = obj_surface->width;
1016     h = obj_surface->height;
1017
1018     orig_pp_out_w = destw;
1019     orig_pp_out_h = desth;
1020     pp_out_w = ALIGN(orig_pp_out_w, 16);
1021     pp_out_h = ALIGN(orig_pp_out_h, 16);
1022     dri_bo_unreference(obj_surface->pp_out_bo);
1023     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
1024                                           "intermediate surface",
1025                                           SIZE_YUV420(pp_out_w, pp_out_h),
1026                                           4096);
1027     assert(obj_surface->pp_out_bo);
1028     obj_surface->orig_pp_out_width = orig_pp_out_w;
1029     obj_surface->orig_pp_out_height = orig_pp_out_h;
1030     obj_surface->pp_out_width = pp_out_w;
1031     obj_surface->pp_out_height = pp_out_h;
1032
1033     /* source Y surface index 1 */
1034     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
1035
1036     index = 1;
1037     pp_context->surfaces[index].s_bo = obj_surface->bo;
1038     dri_bo_reference(pp_context->surfaces[index].s_bo);
1039     bo = dri_bo_alloc(i965->intel.bufmgr, 
1040                       "surface state", 
1041                       sizeof(struct i965_surface_state), 
1042                       4096);
1043     assert(bo);
1044     pp_context->surfaces[index].ss_bo = bo;
1045     dri_bo_map(bo, True);
1046     assert(bo->virtual);
1047     ss = bo->virtual;
1048     memset(ss, 0, sizeof(*ss));
1049     ss->ss0.surface_type = I965_SURFACE_2D;
1050     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1051     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1052     ss->ss2.width = orig_w - 1;
1053     ss->ss2.height = orig_h - 1;
1054     ss->ss3.pitch = w - 1;
1055     pp_set_surface_tiling(ss, tiling);
1056     dri_bo_emit_reloc(bo,
1057                       I915_GEM_DOMAIN_RENDER, 
1058                       0,
1059                       0,
1060                       offsetof(struct i965_surface_state, ss1),
1061                       pp_context->surfaces[index].s_bo);
1062     dri_bo_unmap(bo);
1063
1064     /* source UV surface index 2 */
1065     index = 2;
1066     pp_context->surfaces[index].s_bo = obj_surface->bo;
1067     dri_bo_reference(pp_context->surfaces[index].s_bo);
1068     bo = dri_bo_alloc(i965->intel.bufmgr, 
1069                       "surface state", 
1070                       sizeof(struct i965_surface_state), 
1071                       4096);
1072     assert(bo);
1073     pp_context->surfaces[index].ss_bo = bo;
1074     dri_bo_map(bo, True);
1075     assert(bo->virtual);
1076     ss = bo->virtual;
1077     memset(ss, 0, sizeof(*ss));
1078     ss->ss0.surface_type = I965_SURFACE_2D;
1079     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1080     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1081     ss->ss2.width = orig_w / 2 - 1;
1082     ss->ss2.height = orig_h / 2 - 1;
1083     ss->ss3.pitch = w - 1;
1084     pp_set_surface_tiling(ss, tiling);
1085     dri_bo_emit_reloc(bo,
1086                       I915_GEM_DOMAIN_RENDER, 
1087                       0,
1088                       w * h,
1089                       offsetof(struct i965_surface_state, ss1),
1090                       pp_context->surfaces[index].s_bo);
1091     dri_bo_unmap(bo);
1092
1093     /* destination Y surface index 7 */
1094     index = 7;
1095     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1096     dri_bo_reference(pp_context->surfaces[index].s_bo);
1097     bo = dri_bo_alloc(i965->intel.bufmgr, 
1098                       "surface state", 
1099                       sizeof(struct i965_surface_state), 
1100                       4096);
1101     assert(bo);
1102     pp_context->surfaces[index].ss_bo = bo;
1103     dri_bo_map(bo, True);
1104     assert(bo->virtual);
1105     ss = bo->virtual;
1106     memset(ss, 0, sizeof(*ss));
1107     ss->ss0.surface_type = I965_SURFACE_2D;
1108     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1109     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1110     ss->ss2.width = pp_out_w / 4 - 1;
1111     ss->ss2.height = pp_out_h - 1;
1112     ss->ss3.pitch = pp_out_w - 1;
1113     dri_bo_emit_reloc(bo,
1114                       I915_GEM_DOMAIN_RENDER, 
1115                       I915_GEM_DOMAIN_RENDER,
1116                       0,
1117                       offsetof(struct i965_surface_state, ss1),
1118                       pp_context->surfaces[index].s_bo);
1119     dri_bo_unmap(bo);
1120
1121     /* destination UV surface index 8 */
1122     index = 8;
1123     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1124     dri_bo_reference(pp_context->surfaces[index].s_bo);
1125     bo = dri_bo_alloc(i965->intel.bufmgr, 
1126                       "surface state", 
1127                       sizeof(struct i965_surface_state), 
1128                       4096);
1129     assert(bo);
1130     pp_context->surfaces[index].ss_bo = bo;
1131     dri_bo_map(bo, True);
1132     assert(bo->virtual);
1133     ss = bo->virtual;
1134     memset(ss, 0, sizeof(*ss));
1135     ss->ss0.surface_type = I965_SURFACE_2D;
1136     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1137     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + pp_out_w * pp_out_h;
1138     ss->ss2.width = pp_out_w / 4 - 1;
1139     ss->ss2.height = pp_out_h / 2 - 1;
1140     ss->ss3.pitch = pp_out_w - 1;
1141     dri_bo_emit_reloc(bo,
1142                       I915_GEM_DOMAIN_RENDER, 
1143                       I915_GEM_DOMAIN_RENDER,
1144                       pp_out_w * pp_out_h,
1145                       offsetof(struct i965_surface_state, ss1),
1146                       pp_context->surfaces[index].s_bo);
1147     dri_bo_unmap(bo);
1148
1149     /* sampler state */
1150     dri_bo_map(pp_context->sampler_state_table.bo, True);
1151     assert(pp_context->sampler_state_table.bo->virtual);
1152     sampler_state = pp_context->sampler_state_table.bo->virtual;
1153
1154     /* SIMD16 Y index 1 */
1155     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
1156     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1157     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1158     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1159     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1160
1161     /* SIMD16 UV index 2 */
1162     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
1163     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1164     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1165     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1166     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1167
1168     dri_bo_unmap(pp_context->sampler_state_table.bo);
1169
1170     /* private function & data */
1171     pp_context->pp_x_steps = pp_scaling_x_steps;
1172     pp_context->pp_y_steps = pp_scaling_y_steps;
1173     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
1174
1175     pp_scaling_context->dest_w = pp_out_w;
1176     pp_scaling_context->dest_h = pp_out_h;
1177
1178     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) 1.0 / pp_out_h;
1179     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) 1.0 / pp_out_w;
1180     pp_inline_parameter.grf5.block_count_x = pp_out_w / 16;   /* 1 x N */
1181     pp_inline_parameter.grf5.number_blocks = pp_out_w / 16;
1182     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1183     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1184 }
1185
1186 static int
1187 pp_avs_x_steps(void *private_context)
1188 {
1189     struct pp_avs_context *pp_avs_context = private_context;
1190
1191     return pp_avs_context->dest_w / 16;
1192 }
1193
1194 static int
1195 pp_avs_y_steps(void *private_context)
1196 {
1197     return 1;
1198 }
1199
1200 static int
1201 pp_avs_set_block_parameter(void *private_context, int x, int y)
1202 {
1203     struct pp_avs_context *pp_avs_context = private_context;
1204     float src_x_steping, src_y_steping, video_step_delta;
1205     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
1206
1207     if (tmp_w >= pp_avs_context->dest_w) {
1208         pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1209         pp_inline_parameter.grf6.video_step_delta = 0;
1210         
1211         if (x == 0) {
1212             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2;
1213         } else {
1214             src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1215             video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1216             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1217                 16 * 15 * video_step_delta / 2;
1218         }
1219     } else {
1220         int n0, n1, n2, nls_left, nls_right;
1221         int factor_a = 5, factor_b = 4;
1222         float f;
1223
1224         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1225         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1226         n2 = tmp_w / (16 * factor_a);
1227         nls_left = n0 + n2;
1228         nls_right = n1 + n2;
1229         f = (float) n2 * 16 / tmp_w;
1230         
1231         if (n0 < 5) {
1232             pp_inline_parameter.grf6.video_step_delta = 0.0;
1233
1234             if (x == 0) {
1235                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1236                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = 0.0;
1237             } else {
1238                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1239                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1240                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1241                     16 * 15 * video_step_delta / 2;
1242             }
1243         } else {
1244             if (x < nls_left) {
1245                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1246                 float a = f / (nls_left * 16 * factor_b);
1247                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1248                 
1249                 pp_inline_parameter.grf6.video_step_delta = b;
1250
1251                 if (x == 0) {
1252                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = 0.0;
1253                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a;
1254                 } else {
1255                     src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1256                     video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1257                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1258                         16 * 15 * video_step_delta / 2;
1259                     pp_inline_parameter.grf5.normalized_video_x_scaling_step += 16 * b;
1260                 }
1261             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1262                 /* scale the center linearly */
1263                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1264                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1265                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1266                     16 * 15 * video_step_delta / 2;
1267                 pp_inline_parameter.grf6.video_step_delta = 0.0;
1268                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1269             } else {
1270                 float a = f / (nls_right * 16 * factor_b);
1271                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1272
1273                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1274                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1275                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1276                     16 * 15 * video_step_delta / 2;
1277                 pp_inline_parameter.grf6.video_step_delta = -b;
1278
1279                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
1280                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
1281                 else
1282                     pp_inline_parameter.grf5.normalized_video_x_scaling_step -= b * 16;
1283             }
1284         }
1285     }
1286
1287     src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
1288     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8;
1289     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1290     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
1291
1292     return 0;
1293 }
1294
1295 static void
1296 pp_nv12_avs_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
1297                        unsigned short srcw, unsigned short srch,
1298                        unsigned short destw, unsigned short desth)
1299 {
1300     struct i965_driver_data *i965 = i965_driver_data(ctx);
1301     struct i965_post_processing_context *pp_context = i965->pp_context;
1302     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1303     struct object_surface *obj_surface;
1304     struct i965_surface_state *ss;
1305     struct i965_sampler_8x8 *sampler_8x8;
1306     struct i965_sampler_8x8_state *sampler_8x8_state;
1307     struct i965_surface_state2 *ss_8x8;
1308     dri_bo *bo, *src_bo;
1309     int index;
1310     int w, h;
1311     int orig_w, orig_h;
1312     int pp_out_w, pp_out_h;
1313     int orig_pp_out_w, orig_pp_out_h;
1314     unsigned int tiling, swizzle;
1315
1316     /* surface */
1317     obj_surface = SURFACE(surface);
1318     
1319     if (input == 1) {
1320         orig_w = obj_surface->orig_pp_out_width;
1321         orig_h = obj_surface->orig_pp_out_height;
1322         w = obj_surface->pp_out_width;
1323         h = obj_surface->pp_out_height;
1324         src_bo = obj_surface->pp_out_bo;
1325     } else {
1326         orig_w = obj_surface->orig_width;
1327         orig_h = obj_surface->orig_height;
1328         w = obj_surface->width;
1329         h = obj_surface->height;
1330         src_bo = obj_surface->bo;
1331     }
1332
1333     assert(src_bo);
1334     dri_bo_get_tiling(src_bo, &tiling, &swizzle);
1335
1336     /* source Y surface index 1 */
1337     index = 1;
1338     pp_context->surfaces[index].s_bo = src_bo;
1339     dri_bo_reference(pp_context->surfaces[index].s_bo);
1340     bo = dri_bo_alloc(i965->intel.bufmgr, 
1341                       "Y surface state for sample_8x8", 
1342                       sizeof(struct i965_surface_state2), 
1343                       4096);
1344     assert(bo);
1345     pp_context->surfaces[index].ss_bo = bo;
1346     dri_bo_map(bo, True);
1347     assert(bo->virtual);
1348     ss_8x8 = bo->virtual;
1349     memset(ss_8x8, 0, sizeof(*ss_8x8));
1350     ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
1351     ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
1352     ss_8x8->ss1.width = orig_w - 1;
1353     ss_8x8->ss1.height = orig_h - 1;
1354     ss_8x8->ss2.half_pitch_for_chroma = 0;
1355     ss_8x8->ss2.pitch = w - 1;
1356     ss_8x8->ss2.interleave_chroma = 0;
1357     ss_8x8->ss2.surface_format = SURFACE_FORMAT_Y8_UNORM;
1358     ss_8x8->ss3.x_offset_for_cb = 0;
1359     ss_8x8->ss3.y_offset_for_cb = 0;
1360     pp_set_surface2_tiling(ss_8x8, tiling);
1361     dri_bo_emit_reloc(bo,
1362                       I915_GEM_DOMAIN_RENDER, 
1363                       0,
1364                       0,
1365                       offsetof(struct i965_surface_state2, ss0),
1366                       pp_context->surfaces[index].s_bo);
1367     dri_bo_unmap(bo);
1368
1369     /* source UV surface index 2 */
1370     index = 2;
1371     pp_context->surfaces[index].s_bo = src_bo;
1372     dri_bo_reference(pp_context->surfaces[index].s_bo);
1373     bo = dri_bo_alloc(i965->intel.bufmgr, 
1374                       "UV surface state for sample_8x8", 
1375                       sizeof(struct i965_surface_state2), 
1376                       4096);
1377     assert(bo);
1378     pp_context->surfaces[index].ss_bo = bo;
1379     dri_bo_map(bo, True);
1380     assert(bo->virtual);
1381     ss_8x8 = bo->virtual;
1382     memset(ss_8x8, 0, sizeof(*ss_8x8));
1383     ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset + w * h;
1384     ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
1385     ss_8x8->ss1.width = orig_w - 1;
1386     ss_8x8->ss1.height = orig_h - 1;
1387     ss_8x8->ss2.half_pitch_for_chroma = 0;
1388     ss_8x8->ss2.pitch = w - 1;
1389     ss_8x8->ss2.interleave_chroma = 1;
1390     ss_8x8->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
1391     ss_8x8->ss3.x_offset_for_cb = 0;
1392     ss_8x8->ss3.y_offset_for_cb = 0;
1393     pp_set_surface2_tiling(ss_8x8, tiling);
1394     dri_bo_emit_reloc(bo,
1395                       I915_GEM_DOMAIN_RENDER, 
1396                       0,
1397                       w * h,
1398                       offsetof(struct i965_surface_state2, ss0),
1399                       pp_context->surfaces[index].s_bo);
1400     dri_bo_unmap(bo);
1401
1402     orig_pp_out_w = destw;
1403     orig_pp_out_h = desth;
1404     pp_out_w = ALIGN(orig_pp_out_w, 16);
1405     pp_out_h = ALIGN(orig_pp_out_h, 16);
1406     dri_bo_unreference(obj_surface->pp_out_bo);
1407     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
1408                                           "intermediate surface",
1409                                           SIZE_YUV420(pp_out_w, pp_out_h),
1410                                           4096);
1411     assert(obj_surface->pp_out_bo);
1412     obj_surface->orig_pp_out_width = orig_pp_out_w;
1413     obj_surface->orig_pp_out_height = orig_pp_out_h;
1414     obj_surface->pp_out_width = pp_out_w;
1415     obj_surface->pp_out_height = pp_out_h;
1416
1417     /* destination Y surface index 7 */
1418     index = 7;
1419     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1420     dri_bo_reference(pp_context->surfaces[index].s_bo);
1421     bo = dri_bo_alloc(i965->intel.bufmgr, 
1422                       "surface state", 
1423                       sizeof(struct i965_surface_state), 
1424                       4096);
1425     assert(bo);
1426     pp_context->surfaces[index].ss_bo = bo;
1427     dri_bo_map(bo, True);
1428     assert(bo->virtual);
1429     ss = bo->virtual;
1430     memset(ss, 0, sizeof(*ss));
1431     ss->ss0.surface_type = I965_SURFACE_2D;
1432     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1433     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1434     ss->ss2.width = pp_out_w / 4 - 1;
1435     ss->ss2.height = pp_out_h - 1;
1436     ss->ss3.pitch = pp_out_w - 1;
1437     dri_bo_emit_reloc(bo,
1438                       I915_GEM_DOMAIN_RENDER, 
1439                       I915_GEM_DOMAIN_RENDER,
1440                       0,
1441                       offsetof(struct i965_surface_state, ss1),
1442                       pp_context->surfaces[index].s_bo);
1443     dri_bo_unmap(bo);
1444
1445     /* destination UV surface index 8 */
1446     index = 8;
1447     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1448     dri_bo_reference(pp_context->surfaces[index].s_bo);
1449     bo = dri_bo_alloc(i965->intel.bufmgr, 
1450                       "surface state", 
1451                       sizeof(struct i965_surface_state), 
1452                       4096);
1453     assert(bo);
1454     pp_context->surfaces[index].ss_bo = bo;
1455     dri_bo_map(bo, True);
1456     assert(bo->virtual);
1457     ss = bo->virtual;
1458     memset(ss, 0, sizeof(*ss));
1459     ss->ss0.surface_type = I965_SURFACE_2D;
1460     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1461     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + pp_out_w * pp_out_h;
1462     ss->ss2.width = pp_out_w / 4 - 1;
1463     ss->ss2.height = pp_out_h / 2 - 1;
1464     ss->ss3.pitch = pp_out_w - 1;
1465     dri_bo_emit_reloc(bo,
1466                       I915_GEM_DOMAIN_RENDER, 
1467                       I915_GEM_DOMAIN_RENDER,
1468                       pp_out_w * pp_out_h,
1469                       offsetof(struct i965_surface_state, ss1),
1470                       pp_context->surfaces[index].s_bo);
1471     dri_bo_unmap(bo);
1472     
1473     /* sampler 8x8 state */
1474     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1475     assert(pp_context->sampler_state_table.bo_8x8->virtual);
1476     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1477     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1478     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1479     sampler_8x8_state->dw136.default_sharpness_level = 0;
1480     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1481     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1482     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1483     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1484
1485     /* sampler 8x8 */
1486     dri_bo_map(pp_context->sampler_state_table.bo, True);
1487     assert(pp_context->sampler_state_table.bo->virtual);
1488     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
1489     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1490
1491     /* sample_8x8 Y index 1 */
1492     index = 1;
1493     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1494     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1495     sampler_8x8[index].dw0.ief_bypass = 0;
1496     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1497     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1498     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1499     sampler_8x8[index].dw2.global_noise_estimation = 22;
1500     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1501     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1502     sampler_8x8[index].dw3.strong_edge_weight = 7;
1503     sampler_8x8[index].dw3.regular_weight = 2;
1504     sampler_8x8[index].dw3.non_edge_weight = 0;
1505     sampler_8x8[index].dw3.gain_factor = 40;
1506     sampler_8x8[index].dw4.steepness_boost = 0;
1507     sampler_8x8[index].dw4.steepness_threshold = 0;
1508     sampler_8x8[index].dw4.mr_boost = 0;
1509     sampler_8x8[index].dw4.mr_threshold = 5;
1510     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1511     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1512     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1513     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1514     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1515     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1516     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1517     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1518     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1519     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1520     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1521     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1522     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1523     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1524     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1525     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1526     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1527     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1528     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1529     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1530     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1531     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1532     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1533     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1534     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1535     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1536     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1537     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1538     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1539     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1540     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1541     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1542     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1543     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1544     sampler_8x8[index].dw13.limiter_boost = 0;
1545     sampler_8x8[index].dw13.minimum_limiter = 10;
1546     sampler_8x8[index].dw13.maximum_limiter = 11;
1547     sampler_8x8[index].dw14.clip_limiter = 130;
1548     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1549                       I915_GEM_DOMAIN_RENDER, 
1550                       0,
1551                       0,
1552                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1553                       pp_context->sampler_state_table.bo_8x8);
1554
1555     dri_bo_map(pp_context->sampler_state_table.bo_8x8_uv, True);
1556     assert(pp_context->sampler_state_table.bo_8x8_uv->virtual);
1557     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1558     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8_uv->virtual;
1559     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1560     sampler_8x8_state->dw136.default_sharpness_level = 0;
1561     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0;
1562     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1563     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1564     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8_uv);
1565
1566     /* sample_8x8 UV index 2 */
1567     index = 2;
1568     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1569     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_NEAREST;
1570     sampler_8x8[index].dw0.ief_bypass = 0;
1571     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1572     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1573     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8_uv->offset >> 5;
1574     sampler_8x8[index].dw2.global_noise_estimation = 22;
1575     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1576     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1577     sampler_8x8[index].dw3.strong_edge_weight = 7;
1578     sampler_8x8[index].dw3.regular_weight = 2;
1579     sampler_8x8[index].dw3.non_edge_weight = 0;
1580     sampler_8x8[index].dw3.gain_factor = 40;
1581     sampler_8x8[index].dw4.steepness_boost = 0;
1582     sampler_8x8[index].dw4.steepness_threshold = 0;
1583     sampler_8x8[index].dw4.mr_boost = 0;
1584     sampler_8x8[index].dw4.mr_threshold = 5;
1585     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1586     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1587     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1588     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1589     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1590     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1591     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1592     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1593     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1594     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1595     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1596     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1597     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1598     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1599     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1600     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1601     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1602     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1603     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1604     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1605     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1606     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1607     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1608     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1609     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1610     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1611     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1612     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1613     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1614     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1615     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1616     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1617     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1618     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1619     sampler_8x8[index].dw13.limiter_boost = 0;
1620     sampler_8x8[index].dw13.minimum_limiter = 10;
1621     sampler_8x8[index].dw13.maximum_limiter = 11;
1622     sampler_8x8[index].dw14.clip_limiter = 130;
1623     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1624                       I915_GEM_DOMAIN_RENDER, 
1625                       0,
1626                       0,
1627                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1628                       pp_context->sampler_state_table.bo_8x8_uv);
1629
1630     dri_bo_unmap(pp_context->sampler_state_table.bo);
1631
1632     /* private function & data */
1633     pp_context->pp_x_steps = pp_avs_x_steps;
1634     pp_context->pp_y_steps = pp_avs_y_steps;
1635     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
1636
1637     pp_avs_context->dest_w = pp_out_w;
1638     pp_avs_context->dest_h = pp_out_h;
1639     pp_avs_context->src_w = w;
1640     pp_avs_context->src_h = h;
1641
1642     pp_static_parameter.grf4.r4_2.avs.nlas = 1;
1643     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) 1.0 / pp_out_h;
1644     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) 1.0 / pp_out_w;
1645     pp_inline_parameter.grf5.block_count_x = 1;        /* M x 1 */
1646     pp_inline_parameter.grf5.number_blocks = pp_out_h / 8;
1647     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1648     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1649     pp_inline_parameter.grf6.video_step_delta = 0.0;
1650 }
1651
1652 static int
1653 pp_dndi_x_steps(void *private_context)
1654 {
1655     return 1;
1656 }
1657
1658 static int
1659 pp_dndi_y_steps(void *private_context)
1660 {
1661     struct pp_dndi_context *pp_dndi_context = private_context;
1662
1663     return pp_dndi_context->dest_h / 4;
1664 }
1665
1666 static int
1667 pp_dndi_set_block_parameter(void *private_context, int x, int y)
1668 {
1669     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1670     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 4;
1671
1672     return 0;
1673 }
1674
1675 static 
1676 void pp_nv12_dndi_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
1677                              unsigned short srcw, unsigned short srch,
1678                              unsigned short destw, unsigned short desth)
1679 {
1680     struct i965_driver_data *i965 = i965_driver_data(ctx);
1681     struct i965_post_processing_context *pp_context = i965->pp_context;
1682     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
1683     struct object_surface *obj_surface;
1684     struct i965_surface_state *ss;
1685     struct i965_surface_state2 *ss_dndi;
1686     struct i965_sampler_dndi *sampler_dndi;
1687     dri_bo *bo;
1688     int index;
1689     int w, h;
1690     int orig_w, orig_h;
1691     unsigned int tiling, swizzle;
1692
1693     /* surface */
1694     obj_surface = SURFACE(surface);
1695     orig_w = obj_surface->orig_width;
1696     orig_h = obj_surface->orig_height;
1697     w = obj_surface->width;
1698     h = obj_surface->height;
1699
1700     if (pp_context->stmm.bo == NULL) {
1701         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
1702                                            "STMM surface",
1703                                            w * h,
1704                                            4096);
1705         assert(pp_context->stmm.bo);
1706     }
1707
1708     dri_bo_unreference(obj_surface->pp_out_bo);
1709     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
1710                                           "intermediate surface",
1711                                           SIZE_YUV420(w, h),
1712                                           4096);
1713     assert(obj_surface->pp_out_bo);
1714     obj_surface->orig_pp_out_width = orig_w;
1715     obj_surface->orig_pp_out_height = orig_h;
1716     obj_surface->pp_out_width = w;
1717     obj_surface->pp_out_height = h;
1718
1719     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
1720     /* source UV surface index 2 */
1721     index = 2;
1722     pp_context->surfaces[index].s_bo = obj_surface->bo;
1723     dri_bo_reference(pp_context->surfaces[index].s_bo);
1724     bo = dri_bo_alloc(i965->intel.bufmgr, 
1725                       "surface state", 
1726                       sizeof(struct i965_surface_state), 
1727                       4096);
1728     assert(bo);
1729     pp_context->surfaces[index].ss_bo = bo;
1730     dri_bo_map(bo, True);
1731     assert(bo->virtual);
1732     ss = bo->virtual;
1733     memset(ss, 0, sizeof(*ss));
1734     ss->ss0.surface_type = I965_SURFACE_2D;
1735     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1736     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1737     ss->ss2.width = orig_w / 4 - 1;
1738     ss->ss2.height = orig_h / 2 - 1;
1739     ss->ss3.pitch = w - 1;
1740     pp_set_surface_tiling(ss, tiling);
1741     dri_bo_emit_reloc(bo,
1742                       I915_GEM_DOMAIN_RENDER, 
1743                       0,
1744                       w * h,
1745                       offsetof(struct i965_surface_state, ss1),
1746                       pp_context->surfaces[index].s_bo);
1747     dri_bo_unmap(bo);
1748
1749     /* source YUV surface index 4 */
1750     index = 4;
1751     pp_context->surfaces[index].s_bo = obj_surface->bo;
1752     dri_bo_reference(pp_context->surfaces[index].s_bo);
1753     bo = dri_bo_alloc(i965->intel.bufmgr, 
1754                       "YUV surface state for deinterlace ", 
1755                       sizeof(struct i965_surface_state2), 
1756                       4096);
1757     assert(bo);
1758     pp_context->surfaces[index].ss_bo = bo;
1759     dri_bo_map(bo, True);
1760     assert(bo->virtual);
1761     ss_dndi = bo->virtual;
1762     memset(ss_dndi, 0, sizeof(*ss_dndi));
1763     ss_dndi->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
1764     ss_dndi->ss1.cbcr_pixel_offset_v_direction = 0;
1765     ss_dndi->ss1.width = w - 1;
1766     ss_dndi->ss1.height = h - 1;
1767     ss_dndi->ss1.cbcr_pixel_offset_v_direction = 1;
1768     ss_dndi->ss2.half_pitch_for_chroma = 0;
1769     ss_dndi->ss2.pitch = w - 1;
1770     ss_dndi->ss2.interleave_chroma = 1;
1771     ss_dndi->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
1772     ss_dndi->ss2.half_pitch_for_chroma = 0;
1773     ss_dndi->ss2.tiled_surface = 0;
1774     ss_dndi->ss3.x_offset_for_cb = 0;
1775     ss_dndi->ss3.y_offset_for_cb = h;
1776     pp_set_surface2_tiling(ss_dndi, tiling);
1777     dri_bo_emit_reloc(bo,
1778                       I915_GEM_DOMAIN_RENDER, 
1779                       0,
1780                       0,
1781                       offsetof(struct i965_surface_state2, ss0),
1782                       pp_context->surfaces[index].s_bo);
1783     dri_bo_unmap(bo);
1784
1785     /* source STMM surface index 20 */
1786     index = 20;
1787     pp_context->surfaces[index].s_bo = pp_context->stmm.bo;
1788     dri_bo_reference(pp_context->surfaces[index].s_bo);
1789     bo = dri_bo_alloc(i965->intel.bufmgr, 
1790                       "STMM surface state for deinterlace ", 
1791                       sizeof(struct i965_surface_state2), 
1792                       4096);
1793     assert(bo);
1794     pp_context->surfaces[index].ss_bo = bo;
1795     dri_bo_map(bo, True);
1796     assert(bo->virtual);
1797     ss = bo->virtual;
1798     memset(ss, 0, sizeof(*ss));
1799     ss->ss0.surface_type = I965_SURFACE_2D;
1800     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1801     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1802     ss->ss2.width = w - 1;
1803     ss->ss2.height = h - 1;
1804     ss->ss3.pitch = w - 1;
1805     dri_bo_emit_reloc(bo,
1806                       I915_GEM_DOMAIN_RENDER, 
1807                       I915_GEM_DOMAIN_RENDER,
1808                       0,
1809                       offsetof(struct i965_surface_state, ss1),
1810                       pp_context->surfaces[index].s_bo);
1811     dri_bo_unmap(bo);
1812
1813     /* destination Y surface index 7 */
1814     index = 7;
1815     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1816     dri_bo_reference(pp_context->surfaces[index].s_bo);
1817     bo = dri_bo_alloc(i965->intel.bufmgr, 
1818                       "surface state", 
1819                       sizeof(struct i965_surface_state), 
1820                       4096);
1821     assert(bo);
1822     pp_context->surfaces[index].ss_bo = bo;
1823     dri_bo_map(bo, True);
1824     assert(bo->virtual);
1825     ss = bo->virtual;
1826     memset(ss, 0, sizeof(*ss));
1827     ss->ss0.surface_type = I965_SURFACE_2D;
1828     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1829     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1830     ss->ss2.width = w / 4 - 1;
1831     ss->ss2.height = h - 1;
1832     ss->ss3.pitch = w - 1;
1833     dri_bo_emit_reloc(bo,
1834                       I915_GEM_DOMAIN_RENDER, 
1835                       I915_GEM_DOMAIN_RENDER,
1836                       0,
1837                       offsetof(struct i965_surface_state, ss1),
1838                       pp_context->surfaces[index].s_bo);
1839     dri_bo_unmap(bo);
1840
1841     /* destination UV surface index 8 */
1842     index = 8;
1843     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1844     dri_bo_reference(pp_context->surfaces[index].s_bo);
1845     bo = dri_bo_alloc(i965->intel.bufmgr, 
1846                       "surface state", 
1847                       sizeof(struct i965_surface_state), 
1848                       4096);
1849     assert(bo);
1850     pp_context->surfaces[index].ss_bo = bo;
1851     dri_bo_map(bo, True);
1852     assert(bo->virtual);
1853     ss = bo->virtual;
1854     memset(ss, 0, sizeof(*ss));
1855     ss->ss0.surface_type = I965_SURFACE_2D;
1856     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1857     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1858     ss->ss2.width = w / 4 - 1;
1859     ss->ss2.height = h / 2 - 1;
1860     ss->ss3.pitch = w - 1;
1861     dri_bo_emit_reloc(bo,
1862                       I915_GEM_DOMAIN_RENDER, 
1863                       I915_GEM_DOMAIN_RENDER,
1864                       w * h,
1865                       offsetof(struct i965_surface_state, ss1),
1866                       pp_context->surfaces[index].s_bo);
1867     dri_bo_unmap(bo);
1868
1869     /* sampler dndi */
1870     dri_bo_map(pp_context->sampler_state_table.bo, True);
1871     assert(pp_context->sampler_state_table.bo->virtual);
1872     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
1873     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
1874
1875     /* sample dndi index 1 */
1876     index = 0;
1877     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
1878     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
1879     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
1880     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
1881
1882     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
1883     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
1884     sampler_dndi[index].dw1.stmm_c2 = 0;
1885     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
1886     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
1887
1888     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
1889     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
1890     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
1891     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
1892
1893     sampler_dndi[index].dw3.maximum_stmm = 128;
1894     sampler_dndi[index].dw3.multipler_for_vecm = 2;
1895     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
1896     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
1897     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
1898
1899     sampler_dndi[index].dw4.sdi_delta = 8;
1900     sampler_dndi[index].dw4.sdi_threshold = 128;
1901     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
1902     sampler_dndi[index].dw4.stmm_shift_up = 0;
1903     sampler_dndi[index].dw4.stmm_shift_down = 0;
1904     sampler_dndi[index].dw4.minimum_stmm = 0;
1905
1906     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
1907     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
1908     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
1909     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
1910
1911     sampler_dndi[index].dw6.dn_enable = 1;
1912     sampler_dndi[index].dw6.di_enable = 1;
1913     sampler_dndi[index].dw6.di_partial = 0;
1914     sampler_dndi[index].dw6.dndi_top_first = 1;
1915     sampler_dndi[index].dw6.dndi_stream_id = 1;
1916     sampler_dndi[index].dw6.dndi_first_frame = 1;
1917     sampler_dndi[index].dw6.progressive_dn = 0;
1918     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
1919     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
1920     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
1921
1922     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
1923     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
1924     sampler_dndi[index].dw7.vdi_walker_enable = 0;
1925     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
1926
1927     dri_bo_unmap(pp_context->sampler_state_table.bo);
1928
1929     /* private function & data */
1930     pp_context->pp_x_steps = pp_dndi_x_steps;
1931     pp_context->pp_y_steps = pp_dndi_y_steps;
1932     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
1933
1934     pp_static_parameter.grf1.statistics_surface_picth = w / 2;
1935     pp_static_parameter.grf1.r1_6.di.top_field_first = 0;
1936     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m2 = 64;
1937     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m1 = 192;
1938
1939     pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
1940     pp_inline_parameter.grf5.number_blocks = w / 16;
1941     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1942     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1943
1944     pp_dndi_context->dest_w = w;
1945     pp_dndi_context->dest_h = h;
1946 }
1947
1948 static void
1949 ironlake_pp_initialize(VADriverContextP ctx,
1950                        VASurfaceID surface,
1951                        int input,
1952                        short srcx,
1953                        short srcy,
1954                        unsigned short srcw,
1955                        unsigned short srch,
1956                        short destx,
1957                        short desty,
1958                        unsigned short destw,
1959                        unsigned short desth,
1960                        int pp_index)
1961 {
1962     struct i965_driver_data *i965 = i965_driver_data(ctx);
1963     struct i965_post_processing_context *pp_context = i965->pp_context;
1964     struct pp_module *pp_module;
1965     dri_bo *bo;
1966     int i;
1967
1968     dri_bo_unreference(pp_context->curbe.bo);
1969     bo = dri_bo_alloc(i965->intel.bufmgr,
1970                       "constant buffer",
1971                       4096, 
1972                       4096);
1973     assert(bo);
1974     pp_context->curbe.bo = bo;
1975
1976     dri_bo_unreference(pp_context->binding_table.bo);
1977     bo = dri_bo_alloc(i965->intel.bufmgr, 
1978                       "binding table",
1979                       sizeof(unsigned int), 
1980                       4096);
1981     assert(bo);
1982     pp_context->binding_table.bo = bo;
1983
1984     dri_bo_unreference(pp_context->idrt.bo);
1985     bo = dri_bo_alloc(i965->intel.bufmgr, 
1986                       "interface discriptor", 
1987                       sizeof(struct i965_interface_descriptor), 
1988                       4096);
1989     assert(bo);
1990     pp_context->idrt.bo = bo;
1991     pp_context->idrt.num_interface_descriptors = 0;
1992
1993     dri_bo_unreference(pp_context->sampler_state_table.bo);
1994     bo = dri_bo_alloc(i965->intel.bufmgr, 
1995                       "sampler state table", 
1996                       4096,
1997                       4096);
1998     assert(bo);
1999     dri_bo_map(bo, True);
2000     memset(bo->virtual, 0, bo->size);
2001     dri_bo_unmap(bo);
2002     pp_context->sampler_state_table.bo = bo;
2003
2004     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
2005     bo = dri_bo_alloc(i965->intel.bufmgr, 
2006                       "sampler 8x8 state ",
2007                       4096,
2008                       4096);
2009     assert(bo);
2010     pp_context->sampler_state_table.bo_8x8 = bo;
2011
2012     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
2013     bo = dri_bo_alloc(i965->intel.bufmgr, 
2014                       "sampler 8x8 state ",
2015                       4096,
2016                       4096);
2017     assert(bo);
2018     pp_context->sampler_state_table.bo_8x8_uv = bo;
2019
2020     dri_bo_unreference(pp_context->vfe_state.bo);
2021     bo = dri_bo_alloc(i965->intel.bufmgr, 
2022                       "vfe state", 
2023                       sizeof(struct i965_vfe_state), 
2024                       4096);
2025     assert(bo);
2026     pp_context->vfe_state.bo = bo;
2027     
2028     for (i = 0; i < MAX_PP_SURFACES; i++) {
2029         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
2030         pp_context->surfaces[i].ss_bo = NULL;
2031
2032         dri_bo_unreference(pp_context->surfaces[i].s_bo);
2033         pp_context->surfaces[i].s_bo = NULL;
2034     }
2035
2036     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
2037     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
2038     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
2039     assert(pp_modules);
2040     pp_context->current_pp = pp_index;
2041     pp_module = &pp_modules[pp_index];
2042     
2043     if (pp_module->initialize)
2044         pp_module->initialize(ctx, surface, input, srcw, srch, destw, desth);
2045 }
2046
2047 static void
2048 ironlake_post_processing(VADriverContextP ctx,
2049                          VASurfaceID surface,
2050                          int input,
2051                          short srcx,
2052                          short srcy,
2053                          unsigned short srcw,
2054                          unsigned short srch,
2055                          short destx,
2056                          short desty,
2057                          unsigned short destw,
2058                          unsigned short desth,
2059                          int pp_index)
2060 {
2061     ironlake_pp_initialize(ctx, surface, input,
2062                            srcx, srcy, srcw, srch,
2063                            destx, desty, destw, desth,
2064                            pp_index);
2065     ironlake_pp_states_setup(ctx);
2066     ironlake_pp_pipeline_setup(ctx);
2067 }
2068
2069 static void
2070 gen6_pp_initialize(VADriverContextP ctx,
2071                    VASurfaceID surface,
2072                    int input,
2073                    short srcx,
2074                    short srcy,
2075                    unsigned short srcw,
2076                    unsigned short srch,
2077                    short destx,
2078                    short desty,
2079                    unsigned short destw,
2080                    unsigned short desth,
2081                    int pp_index)
2082 {
2083     struct i965_driver_data *i965 = i965_driver_data(ctx);
2084     struct i965_post_processing_context *pp_context = i965->pp_context;
2085     struct pp_module *pp_module;
2086     dri_bo *bo;
2087     int i;
2088
2089     dri_bo_unreference(pp_context->curbe.bo);
2090     bo = dri_bo_alloc(i965->intel.bufmgr,
2091                       "constant buffer",
2092                       4096, 
2093                       4096);
2094     assert(bo);
2095     pp_context->curbe.bo = bo;
2096
2097     dri_bo_unreference(pp_context->binding_table.bo);
2098     bo = dri_bo_alloc(i965->intel.bufmgr, 
2099                       "binding table",
2100                       sizeof(unsigned int), 
2101                       4096);
2102     assert(bo);
2103     pp_context->binding_table.bo = bo;
2104
2105     dri_bo_unreference(pp_context->idrt.bo);
2106     bo = dri_bo_alloc(i965->intel.bufmgr, 
2107                       "interface discriptor", 
2108                       sizeof(struct gen6_interface_descriptor_data), 
2109                       4096);
2110     assert(bo);
2111     pp_context->idrt.bo = bo;
2112     pp_context->idrt.num_interface_descriptors = 0;
2113
2114     dri_bo_unreference(pp_context->sampler_state_table.bo);
2115     bo = dri_bo_alloc(i965->intel.bufmgr, 
2116                       "sampler state table", 
2117                       4096,
2118                       4096);
2119     assert(bo);
2120     dri_bo_map(bo, True);
2121     memset(bo->virtual, 0, bo->size);
2122     dri_bo_unmap(bo);
2123     pp_context->sampler_state_table.bo = bo;
2124
2125     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
2126     bo = dri_bo_alloc(i965->intel.bufmgr, 
2127                       "sampler 8x8 state ",
2128                       4096,
2129                       4096);
2130     assert(bo);
2131     pp_context->sampler_state_table.bo_8x8 = bo;
2132
2133     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
2134     bo = dri_bo_alloc(i965->intel.bufmgr, 
2135                       "sampler 8x8 state ",
2136                       4096,
2137                       4096);
2138     assert(bo);
2139     pp_context->sampler_state_table.bo_8x8_uv = bo;
2140
2141     dri_bo_unreference(pp_context->vfe_state.bo);
2142     bo = dri_bo_alloc(i965->intel.bufmgr, 
2143                       "vfe state", 
2144                       sizeof(struct i965_vfe_state), 
2145                       4096);
2146     assert(bo);
2147     pp_context->vfe_state.bo = bo;
2148     
2149     for (i = 0; i < MAX_PP_SURFACES; i++) {
2150         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
2151         pp_context->surfaces[i].ss_bo = NULL;
2152
2153         dri_bo_unreference(pp_context->surfaces[i].s_bo);
2154         pp_context->surfaces[i].s_bo = NULL;
2155     }
2156
2157     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
2158     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
2159     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
2160     assert(pp_modules);
2161     pp_context->current_pp = pp_index;
2162     pp_module = &pp_modules[pp_index];
2163     
2164     if (pp_module->initialize)
2165         pp_module->initialize(ctx, surface, input, srcw, srch, destw, desth);
2166 }
2167
2168 static void
2169 gen6_pp_binding_table(struct i965_post_processing_context *pp_context)
2170 {
2171     unsigned int *binding_table;
2172     dri_bo *bo = pp_context->binding_table.bo;
2173     int i;
2174
2175     dri_bo_map(bo, 1);
2176     assert(bo->virtual);
2177     binding_table = bo->virtual;
2178     memset(binding_table, 0, bo->size);
2179
2180     for (i = 0; i < MAX_PP_SURFACES; i++) {
2181         if (pp_context->surfaces[i].ss_bo) {
2182             assert(pp_context->surfaces[i].s_bo);
2183
2184             binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
2185             dri_bo_emit_reloc(bo,
2186                               I915_GEM_DOMAIN_INSTRUCTION, 0,
2187                               0,
2188                               i * sizeof(*binding_table),
2189                               pp_context->surfaces[i].ss_bo);
2190         }
2191     
2192     }
2193
2194     dri_bo_unmap(bo);
2195 }
2196
2197 static void
2198 gen6_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
2199 {
2200     struct gen6_interface_descriptor_data *desc;
2201     dri_bo *bo;
2202     int pp_index = pp_context->current_pp;
2203
2204     bo = pp_context->idrt.bo;
2205     dri_bo_map(bo, True);
2206     assert(bo->virtual);
2207     desc = bo->virtual;
2208     memset(desc, 0, sizeof(*desc));
2209     desc->desc0.kernel_start_pointer = 
2210         pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
2211     desc->desc1.single_program_flow = 1;
2212     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
2213     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
2214     desc->desc2.sampler_state_pointer = 
2215         pp_context->sampler_state_table.bo->offset >> 5;
2216     desc->desc3.binding_table_entry_count = 0;
2217     desc->desc3.binding_table_pointer = 
2218         pp_context->binding_table.bo->offset >> 5; /*reloc */
2219     desc->desc4.constant_urb_entry_read_offset = 0;
2220     desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
2221
2222     dri_bo_emit_reloc(bo,
2223                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2224                       0,
2225                       offsetof(struct gen6_interface_descriptor_data, desc0),
2226                       pp_modules[pp_index].kernel.bo);
2227
2228     dri_bo_emit_reloc(bo,
2229                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2230                       desc->desc2.sampler_count << 2,
2231                       offsetof(struct gen6_interface_descriptor_data, desc2),
2232                       pp_context->sampler_state_table.bo);
2233
2234     dri_bo_emit_reloc(bo,
2235                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2236                       desc->desc3.binding_table_entry_count,
2237                       offsetof(struct gen6_interface_descriptor_data, desc3),
2238                       pp_context->binding_table.bo);
2239
2240     dri_bo_unmap(bo);
2241     pp_context->idrt.num_interface_descriptors++;
2242 }
2243
2244 static void
2245 gen6_pp_upload_constants(struct i965_post_processing_context *pp_context)
2246 {
2247     unsigned char *constant_buffer;
2248
2249     assert(sizeof(pp_static_parameter) == 128);
2250     dri_bo_map(pp_context->curbe.bo, 1);
2251     assert(pp_context->curbe.bo->virtual);
2252     constant_buffer = pp_context->curbe.bo->virtual;
2253     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
2254     dri_bo_unmap(pp_context->curbe.bo);
2255 }
2256
2257 static void
2258 gen6_pp_states_setup(VADriverContextP ctx)
2259 {
2260     struct i965_driver_data *i965 = i965_driver_data(ctx);
2261     struct i965_post_processing_context *pp_context = i965->pp_context;
2262
2263     gen6_pp_binding_table(pp_context);
2264     gen6_pp_interface_descriptor_table(pp_context);
2265     gen6_pp_upload_constants(pp_context);
2266 }
2267
2268 static void
2269 gen6_pp_pipeline_select(VADriverContextP ctx)
2270 {
2271     BEGIN_BATCH(ctx, 1);
2272     OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
2273     ADVANCE_BATCH(ctx);
2274 }
2275
2276 static void
2277 gen6_pp_state_base_address(VADriverContextP ctx)
2278 {
2279     BEGIN_BATCH(ctx, 10);
2280     OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | (10 - 2));
2281     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2282     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2283     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2284     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2285     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2286     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2287     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2288     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2289     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2290     ADVANCE_BATCH(ctx);
2291 }
2292
2293 static void
2294 gen6_pp_vfe_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2295 {
2296     BEGIN_BATCH(ctx, 8);
2297     OUT_BATCH(ctx, CMD_MEDIA_VFE_STATE | (8 - 2));
2298     OUT_BATCH(ctx, 0);
2299     OUT_BATCH(ctx,
2300               (pp_context->urb.num_vfe_entries - 1) << 16 |
2301               pp_context->urb.num_vfe_entries << 8);
2302     OUT_BATCH(ctx, 0);
2303     OUT_BATCH(ctx,
2304               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* in 256 bits unit */
2305               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 - 1));            /* in 256 bits unit */
2306     OUT_BATCH(ctx, 0);
2307     OUT_BATCH(ctx, 0);
2308     OUT_BATCH(ctx, 0);
2309     ADVANCE_BATCH(ctx);
2310 }
2311
2312 static void
2313 gen6_pp_curbe_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2314 {
2315     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512 <= pp_context->curbe.bo->size);
2316
2317     BEGIN_BATCH(ctx, 4);
2318     OUT_BATCH(ctx, CMD_MEDIA_CURBE_LOAD | (4 - 2));
2319     OUT_BATCH(ctx, 0);
2320     OUT_BATCH(ctx,
2321               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512);
2322     OUT_RELOC(ctx, 
2323               pp_context->curbe.bo,
2324               I915_GEM_DOMAIN_INSTRUCTION, 0,
2325               0);
2326     ADVANCE_BATCH(ctx);
2327 }
2328
2329 static void
2330 gen6_interface_descriptor_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2331 {
2332     BEGIN_BATCH(ctx, 4);
2333     OUT_BATCH(ctx, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
2334     OUT_BATCH(ctx, 0);
2335     OUT_BATCH(ctx,
2336               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
2337     OUT_RELOC(ctx, 
2338               pp_context->idrt.bo,
2339               I915_GEM_DOMAIN_INSTRUCTION, 0,
2340               0);
2341     ADVANCE_BATCH(ctx);
2342 }
2343
2344 static void
2345 gen6_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2346 {
2347     int x, x_steps, y, y_steps;
2348
2349     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
2350     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
2351
2352     for (y = 0; y < y_steps; y++) {
2353         for (x = 0; x < x_steps; x++) {
2354             if (!pp_context->pp_set_block_parameter(&pp_context->private_context, x, y)) {
2355                 BEGIN_BATCH(ctx, 22);
2356                 OUT_BATCH(ctx, CMD_MEDIA_OBJECT | 20);
2357                 OUT_BATCH(ctx, 0);
2358                 OUT_BATCH(ctx, 0); /* no indirect data */
2359                 OUT_BATCH(ctx, 0);
2360                 OUT_BATCH(ctx, 0); /* scoreboard */
2361                 OUT_BATCH(ctx, 0);
2362
2363                 /* inline data grf 5-6 */
2364                 assert(sizeof(pp_inline_parameter) == 64);
2365                 intel_batchbuffer_data(ctx, &pp_inline_parameter, sizeof(pp_inline_parameter));
2366
2367                 ADVANCE_BATCH(ctx);
2368             }
2369         }
2370     }
2371 }
2372
2373 static void
2374 gen6_pp_pipeline_setup(VADriverContextP ctx)
2375 {
2376     struct i965_driver_data *i965 = i965_driver_data(ctx);
2377     struct i965_post_processing_context *pp_context = i965->pp_context;
2378
2379     intel_batchbuffer_start_atomic(ctx, 0x1000);
2380     intel_batchbuffer_emit_mi_flush(ctx);
2381     gen6_pp_pipeline_select(ctx);
2382     gen6_pp_curbe_load(ctx, pp_context);
2383     gen6_interface_descriptor_load(ctx, pp_context);
2384     gen6_pp_state_base_address(ctx);
2385     gen6_pp_vfe_state(ctx, pp_context);
2386     gen6_pp_object_walker(ctx, pp_context);
2387     intel_batchbuffer_end_atomic(ctx);
2388 }
2389
2390 static void
2391 gen6_post_processing(VADriverContextP ctx,
2392                      VASurfaceID surface,
2393                      int input,
2394                      short srcx,
2395                      short srcy,
2396                      unsigned short srcw,
2397                      unsigned short srch,
2398                      short destx,
2399                      short desty,
2400                      unsigned short destw,
2401                      unsigned short desth,
2402                      int pp_index)
2403 {
2404     gen6_pp_initialize(ctx, surface, input,
2405                        srcx, srcy, srcw, srch,
2406                        destx, desty, destw, desth,
2407                        pp_index);
2408     gen6_pp_states_setup(ctx);
2409     gen6_pp_pipeline_setup(ctx);
2410 }
2411
2412 static void
2413 i965_post_processing_internal(VADriverContextP ctx,
2414                               VASurfaceID surface,
2415                               int input,
2416                               short srcx,
2417                               short srcy,
2418                               unsigned short srcw,
2419                               unsigned short srch,
2420                               short destx,
2421                               short desty,
2422                               unsigned short destw,
2423                               unsigned short desth,
2424                               int pp_index)
2425 {
2426     struct i965_driver_data *i965 = i965_driver_data(ctx);
2427
2428     if (IS_GEN6(i965->intel.device_id))
2429         gen6_post_processing(ctx, surface, input,
2430                              srcx, srcy, srcw, srch,
2431                              destx, desty, destw, desth,
2432                              pp_index);
2433     else
2434         ironlake_post_processing(ctx, surface, input,
2435                                  srcx, srcy, srcw, srch,
2436                                  destx, desty, destw, desth,
2437                                  pp_index);
2438 }
2439
2440 void
2441 i965_post_processing(VADriverContextP ctx,
2442                      VASurfaceID surface,
2443                      short srcx,
2444                      short srcy,
2445                      unsigned short srcw,
2446                      unsigned short srch,
2447                      short destx,
2448                      short desty,
2449                      unsigned short destw,
2450                      unsigned short desth,
2451                      unsigned int flag)
2452 {
2453     struct i965_driver_data *i965 = i965_driver_data(ctx);
2454
2455     if (HAS_PP(i965)) {
2456         /* Currently only support post processing for NV12 surface */
2457         if (i965->render_state.interleaved_uv) {
2458             int internal_input = 0;
2459
2460             if (flag & I965_PP_FLAG_DEINTERLACING) {
2461                 i965_post_processing_internal(ctx, surface, internal_input,
2462                                               srcx, srcy, srcw, srch,
2463                                               destx, desty, destw, desth,
2464                                               PP_NV12_DNDI);
2465                 internal_input = 1;
2466             }
2467
2468             if (flag & I965_PP_FLAG_AVS) {
2469                 i965_post_processing_internal(ctx, surface, internal_input,
2470                                               srcx, srcy, srcw, srch,
2471                                               destx, desty, destw, desth,
2472                                               PP_NV12_AVS);
2473             }
2474         }
2475     }
2476 }       
2477
2478 Bool
2479 i965_post_processing_terminate(VADriverContextP ctx)
2480 {
2481     struct i965_driver_data *i965 = i965_driver_data(ctx);
2482     struct i965_post_processing_context *pp_context = i965->pp_context;
2483     int i;
2484
2485     if (HAS_PP(i965)) {
2486         if (pp_context) {
2487             dri_bo_unreference(pp_context->curbe.bo);
2488             pp_context->curbe.bo = NULL;
2489
2490             for (i = 0; i < MAX_PP_SURFACES; i++) {
2491                 dri_bo_unreference(pp_context->surfaces[i].ss_bo);
2492                 pp_context->surfaces[i].ss_bo = NULL;
2493
2494                 dri_bo_unreference(pp_context->surfaces[i].s_bo);
2495                 pp_context->surfaces[i].s_bo = NULL;
2496             }
2497
2498             dri_bo_unreference(pp_context->sampler_state_table.bo);
2499             pp_context->sampler_state_table.bo = NULL;
2500
2501             dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
2502             pp_context->sampler_state_table.bo_8x8 = NULL;
2503
2504             dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
2505             pp_context->sampler_state_table.bo_8x8_uv = NULL;
2506
2507             dri_bo_unreference(pp_context->binding_table.bo);
2508             pp_context->binding_table.bo = NULL;
2509
2510             dri_bo_unreference(pp_context->idrt.bo);
2511             pp_context->idrt.bo = NULL;
2512             pp_context->idrt.num_interface_descriptors = 0;
2513
2514             dri_bo_unreference(pp_context->vfe_state.bo);
2515             pp_context->vfe_state.bo = NULL;
2516
2517             dri_bo_unreference(pp_context->stmm.bo);
2518             pp_context->stmm.bo = NULL;
2519
2520             free(pp_context);
2521         }
2522
2523         i965->pp_context = NULL;
2524
2525         for (i = 0; i < NUM_PP_MODULES && pp_modules; i++) {
2526             struct pp_module *pp_module = &pp_modules[i];
2527
2528             dri_bo_unreference(pp_module->kernel.bo);
2529             pp_module->kernel.bo = NULL;
2530         }
2531     }
2532
2533     return True;
2534 }
2535
2536 Bool
2537 i965_post_processing_init(VADriverContextP ctx)
2538 {
2539     struct i965_driver_data *i965 = i965_driver_data(ctx);
2540     struct i965_post_processing_context *pp_context = i965->pp_context;
2541     int i;
2542
2543     if (HAS_PP(i965)) {
2544         if (pp_context == NULL) {
2545             pp_context = calloc(1, sizeof(*pp_context));
2546             i965->pp_context = pp_context;
2547         }
2548
2549         pp_context->urb.size = URB_SIZE((&i965->intel));
2550         pp_context->urb.num_vfe_entries = 32;
2551         pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
2552         pp_context->urb.num_cs_entries = 1;
2553         pp_context->urb.size_cs_entry = 2;      /* in 512 bits unit */
2554         pp_context->urb.vfe_start = 0;
2555         pp_context->urb.cs_start = pp_context->urb.vfe_start + 
2556             pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
2557         assert(pp_context->urb.cs_start + 
2558                pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
2559
2560         assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
2561
2562         if (IS_GEN6(i965->intel.device_id))
2563             pp_modules = pp_modules_gen6;
2564         else if (IS_IRONLAKE(i965->intel.device_id)) {
2565             pp_modules = pp_modules_gen5;
2566         }
2567
2568         for (i = 0; i < NUM_PP_MODULES && pp_modules; i++) {
2569             struct pp_module *pp_module = &pp_modules[i];
2570             dri_bo_unreference(pp_module->kernel.bo);
2571             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
2572                                                 pp_module->kernel.name,
2573                                                 pp_module->kernel.size,
2574                                                 4096);
2575             assert(pp_module->kernel.bo);
2576             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
2577         }
2578     }
2579
2580     return True;
2581 }