i965_drv_video: store kernel info in the corresponding context
[platform/upstream/libva.git] / i965_drv_video / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include <va/va_backend.h>
35
36 #include "intel_batchbuffer.h"
37 #include "intel_driver.h"
38 #include "i965_defines.h"
39 #include "i965_structs.h"
40 #include "i965_drv_video.h"
41 #include "i965_post_processing.h"
42 #include "i965_render.h"
43
44 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
45                      IS_GEN6((ctx)->intel.device_id))
46
47 static const uint32_t pp_null_gen5[][4] = {
48 #include "shaders/post_processing/null.g4b.gen5"
49 };
50
51 static const uint32_t pp_nv12_load_save_gen5[][4] = {
52 #include "shaders/post_processing/nv12_load_save_nv12.g4b.gen5"
53 };
54
55 static const uint32_t pp_nv12_scaling_gen5[][4] = {
56 #include "shaders/post_processing/nv12_scaling_nv12.g4b.gen5"
57 };
58
59 static const uint32_t pp_nv12_avs_gen5[][4] = {
60 #include "shaders/post_processing/nv12_avs_nv12.g4b.gen5"
61 };
62
63 static const uint32_t pp_nv12_dndi_gen5[][4] = {
64 #include "shaders/post_processing/nv12_dndi_nv12.g4b.gen5"
65 };
66
67 static void pp_null_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
68                                unsigned short srcw, unsigned short srch,
69                                unsigned short destw, unsigned short desth);
70 static void pp_nv12_avs_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
71                                    unsigned short srcw, unsigned short srch,
72                                    unsigned short destw, unsigned short desth);
73 static void pp_nv12_scaling_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
74                                        unsigned short srcw, unsigned short srch,
75                                        unsigned short destw, unsigned short desth);
76 static void pp_nv12_load_save_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
77                                          unsigned short srcw, unsigned short srch,
78                                          unsigned short destw, unsigned short desth);
79 static void pp_nv12_dndi_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
80                                     unsigned short srcw, unsigned short srch,
81                                     unsigned short destw, unsigned short desth);
82
83 static struct pp_module pp_modules_gen5[] = {
84     {
85         {
86             "NULL module (for testing)",
87             PP_NULL,
88             pp_null_gen5,
89             sizeof(pp_null_gen5),
90             NULL,
91         },
92
93         pp_null_initialize,
94     },
95
96     {
97         {
98             "NV12 Load & Save module",
99             PP_NV12_LOAD_SAVE,
100             pp_nv12_load_save_gen5,
101             sizeof(pp_nv12_load_save_gen5),
102             NULL,
103         },
104
105         pp_nv12_load_save_initialize,
106     },
107
108     {
109         {
110             "NV12 Scaling module",
111             PP_NV12_SCALING,
112             pp_nv12_scaling_gen5,
113             sizeof(pp_nv12_scaling_gen5),
114             NULL,
115         },
116
117         pp_nv12_scaling_initialize,
118     },
119
120     {
121         {
122             "NV12 AVS module",
123             PP_NV12_AVS,
124             pp_nv12_avs_gen5,
125             sizeof(pp_nv12_avs_gen5),
126             NULL,
127         },
128
129         pp_nv12_avs_initialize,
130     },
131
132     {
133         {
134             "NV12 DNDI module",
135             PP_NV12_DNDI,
136             pp_nv12_dndi_gen5,
137             sizeof(pp_nv12_dndi_gen5),
138             NULL,
139         },
140
141         pp_nv12_dndi_initialize,
142     },
143 };
144
145 static const uint32_t pp_null_gen6[][4] = {
146 #include "shaders/post_processing/null.g6b"
147 };
148
149 static const uint32_t pp_nv12_load_save_gen6[][4] = {
150 #include "shaders/post_processing/nv12_load_save_nv12.g6b"
151 };
152
153 static const uint32_t pp_nv12_scaling_gen6[][4] = {
154 #include "shaders/post_processing/nv12_scaling_nv12.g6b"
155 };
156
157 static const uint32_t pp_nv12_avs_gen6[][4] = {
158 #include "shaders/post_processing/nv12_avs_nv12.g6b"
159 };
160
161 static const uint32_t pp_nv12_dndi_gen6[][4] = {
162 #include "shaders/post_processing/nv12_dndi_nv12.g6b"
163 };
164
165 static struct pp_module pp_modules_gen6[] = {
166     {
167         {
168             "NULL module (for testing)",
169             PP_NULL,
170             pp_null_gen6,
171             sizeof(pp_null_gen6),
172             NULL,
173         },
174
175         pp_null_initialize,
176     },
177
178     {
179         {
180             "NV12 Load & Save module",
181             PP_NV12_LOAD_SAVE,
182             pp_nv12_load_save_gen6,
183             sizeof(pp_nv12_load_save_gen6),
184             NULL,
185         },
186
187         pp_nv12_load_save_initialize,
188     },
189
190     {
191         {
192             "NV12 Scaling module",
193             PP_NV12_SCALING,
194             pp_nv12_scaling_gen6,
195             sizeof(pp_nv12_scaling_gen6),
196             NULL,
197         },
198
199         pp_nv12_scaling_initialize,
200     },
201
202     {
203         {
204             "NV12 AVS module",
205             PP_NV12_AVS,
206             pp_nv12_avs_gen6,
207             sizeof(pp_nv12_avs_gen6),
208             NULL,
209         },
210
211         pp_nv12_avs_initialize,
212     },
213
214     {
215         {
216             "NV12 DNDI module",
217             PP_NV12_DNDI,
218             pp_nv12_dndi_gen6,
219             sizeof(pp_nv12_dndi_gen6),
220             NULL,
221         },
222
223         pp_nv12_dndi_initialize,
224     },
225 };
226
227 struct pp_static_parameter
228 {
229     struct {
230         /* Procamp r1.0 */
231         float procamp_constant_c0;
232         
233         /* Load and Same r1.1 */
234         unsigned int source_packed_y_offset:8;
235         unsigned int source_packed_u_offset:8;
236         unsigned int source_packed_v_offset:8;
237         unsigned int pad0:8;
238
239         union {
240             /* Load and Save r1.2 */
241             struct {
242                 unsigned int destination_packed_y_offset:8;
243                 unsigned int destination_packed_u_offset:8;
244                 unsigned int destination_packed_v_offset:8;
245                 unsigned int pad0:8;
246             } load_and_save;
247
248             /* CSC r1.2 */
249             struct {
250                 unsigned int destination_rgb_format:8;
251                 unsigned int pad0:24;
252             } csc;
253         } r1_2;
254         
255         /* Procamp r1.3 */
256         float procamp_constant_c1;
257
258         /* Procamp r1.4 */
259         float procamp_constant_c2;
260
261         /* DI r1.5 */
262         unsigned int statistics_surface_picth:16;  /* Devided by 2 */
263         unsigned int pad1:16;
264
265         union {
266             /* DI r1.6 */
267             struct {
268                 unsigned int pad0:24;
269                 unsigned int top_field_first:8;
270             } di;
271
272             /* AVS/Scaling r1.6 */
273             float normalized_video_y_scaling_step;
274         } r1_6;
275
276         /* Procamp r1.7 */
277         float procamp_constant_c5;
278     } grf1;
279     
280     struct {
281         /* Procamp r2.0 */
282         float procamp_constant_c3;
283
284         /* MBZ r2.1*/
285         unsigned int pad0;
286
287         /* WG+CSC r2.2 */
288         float wg_csc_constant_c4;
289
290         /* WG+CSC r2.3 */
291         float wg_csc_constant_c8;
292
293         /* Procamp r2.4 */
294         float procamp_constant_c4;
295
296         /* MBZ r2.5 */
297         unsigned int pad1;
298
299         /* MBZ r2.6 */
300         unsigned int pad2;
301
302         /* WG+CSC r2.7 */
303         float wg_csc_constant_c9;
304     } grf2;
305
306     struct {
307         /* WG+CSC r3.0 */
308         float wg_csc_constant_c0;
309
310         /* Blending r3.1 */
311         float scaling_step_ratio;
312
313         /* Blending r3.2 */
314         float normalized_alpha_y_scaling;
315         
316         /* WG+CSC r3.3 */
317         float wg_csc_constant_c4;
318
319         /* WG+CSC r3.4 */
320         float wg_csc_constant_c1;
321
322         /* ALL r3.5 */
323         int horizontal_origin_offset:16;
324         int vertical_origin_offset:16;
325
326         /* Shared r3.6*/
327         union {
328             /* Color filll */
329             unsigned int color_pixel;
330
331             /* WG+CSC */
332             float wg_csc_constant_c2;
333         } r3_6;
334
335         /* WG+CSC r3.7 */
336         float wg_csc_constant_c3;
337     } grf3;
338
339     struct {
340         /* WG+CSC r4.0 */
341         float wg_csc_constant_c6;
342
343         /* ALL r4.1 MBZ ???*/
344         unsigned int pad0;
345
346         /* Shared r4.2 */
347         union {
348             /* AVS */
349             struct {
350                 unsigned int pad1:15;
351                 unsigned int nlas:1;
352                 unsigned int pad2:16;
353             } avs;
354
355             /* DI */
356             struct {
357                 unsigned int motion_history_coefficient_m2:8;
358                 unsigned int motion_history_coefficient_m1:8;
359                 unsigned int pad0:16;
360             } di;
361         } r4_2;
362
363         /* WG+CSC r4.3 */
364         float wg_csc_constant_c7;
365
366         /* WG+CSC r4.4 */
367         float wg_csc_constant_c10;
368
369         /* AVS r4.5 */
370         float source_video_frame_normalized_horizontal_origin;
371
372         /* MBZ r4.6 */
373         unsigned int pad1;
374
375         /* WG+CSC r4.7 */
376         float wg_csc_constant_c11;
377     } grf4;
378 };
379
380 struct pp_inline_parameter
381 {
382     struct {
383         /* ALL r5.0 */
384         int destination_block_horizontal_origin:16;
385         int destination_block_vertical_origin:16;
386
387         /* Shared r5.1 */
388         union {
389             /* AVS/Scaling */
390             float source_surface_block_normalized_horizontal_origin;
391
392             /* FMD */
393             struct {
394                 unsigned int variance_surface_vertical_origin:16;
395                 unsigned int pad0:16;
396             } fmd;
397         } r5_1; 
398
399         /* AVS/Scaling r5.2 */
400         float source_surface_block_normalized_vertical_origin;
401
402         /* Alpha r5.3 */
403         float alpha_surface_block_normalized_horizontal_origin;
404
405         /* Alpha r5.4 */
406         float alpha_surface_block_normalized_vertical_origin;
407
408         /* Alpha r5.5 */
409         unsigned int alpha_mask_x:16;
410         unsigned int alpha_mask_y:8;
411         unsigned int block_count_x:8;
412
413         /* r5.6 */
414         unsigned int block_horizontal_mask:16;
415         unsigned int block_vertical_mask:8;
416         unsigned int number_blocks:8;
417
418         /* AVS/Scaling r5.7 */
419         float normalized_video_x_scaling_step;
420     } grf5;
421
422     struct {
423         /* AVS r6.0 */
424         float video_step_delta;
425
426         /* r6.1-r6.7 */
427         unsigned int padx[7];
428     } grf6;
429 };
430
431 static struct pp_static_parameter pp_static_parameter;
432 static struct pp_inline_parameter pp_inline_parameter;
433
434 static void
435 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
436 {
437     switch (tiling) {
438     case I915_TILING_NONE:
439         ss->ss3.tiled_surface = 0;
440         ss->ss3.tile_walk = 0;
441         break;
442     case I915_TILING_X:
443         ss->ss3.tiled_surface = 1;
444         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
445         break;
446     case I915_TILING_Y:
447         ss->ss3.tiled_surface = 1;
448         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
449         break;
450     }
451 }
452
453 static void
454 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
455 {
456     switch (tiling) {
457     case I915_TILING_NONE:
458         ss->ss2.tiled_surface = 0;
459         ss->ss2.tile_walk = 0;
460         break;
461     case I915_TILING_X:
462         ss->ss2.tiled_surface = 1;
463         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
464         break;
465     case I915_TILING_Y:
466         ss->ss2.tiled_surface = 1;
467         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
468         break;
469     }
470 }
471
472 static void
473 ironlake_pp_surface_state(struct i965_post_processing_context *pp_context)
474 {
475
476 }
477
478 static void
479 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
480 {
481     struct i965_interface_descriptor *desc;
482     dri_bo *bo;
483     int pp_index = pp_context->current_pp;
484
485     bo = pp_context->idrt.bo;
486     dri_bo_map(bo, 1);
487     assert(bo->virtual);
488     desc = bo->virtual;
489     memset(desc, 0, sizeof(*desc));
490     desc->desc0.grf_reg_blocks = 10;
491     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
492     desc->desc1.const_urb_entry_read_offset = 0;
493     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
494     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
495     desc->desc2.sampler_count = 0;
496     desc->desc3.binding_table_entry_count = 0;
497     desc->desc3.binding_table_pointer = 
498         pp_context->binding_table.bo->offset >> 5; /*reloc */
499
500     dri_bo_emit_reloc(bo,
501                       I915_GEM_DOMAIN_INSTRUCTION, 0,
502                       desc->desc0.grf_reg_blocks,
503                       offsetof(struct i965_interface_descriptor, desc0),
504                       pp_context->pp_modules[pp_index].kernel.bo);
505
506     dri_bo_emit_reloc(bo,
507                       I915_GEM_DOMAIN_INSTRUCTION, 0,
508                       desc->desc2.sampler_count << 2,
509                       offsetof(struct i965_interface_descriptor, desc2),
510                       pp_context->sampler_state_table.bo);
511
512     dri_bo_emit_reloc(bo,
513                       I915_GEM_DOMAIN_INSTRUCTION, 0,
514                       desc->desc3.binding_table_entry_count,
515                       offsetof(struct i965_interface_descriptor, desc3),
516                       pp_context->binding_table.bo);
517
518     dri_bo_unmap(bo);
519     pp_context->idrt.num_interface_descriptors++;
520 }
521
522 static void
523 ironlake_pp_binding_table(struct i965_post_processing_context *pp_context)
524 {
525     unsigned int *binding_table;
526     dri_bo *bo = pp_context->binding_table.bo;
527     int i;
528
529     dri_bo_map(bo, 1);
530     assert(bo->virtual);
531     binding_table = bo->virtual;
532     memset(binding_table, 0, bo->size);
533
534     for (i = 0; i < MAX_PP_SURFACES; i++) {
535         if (pp_context->surfaces[i].ss_bo) {
536             assert(pp_context->surfaces[i].s_bo);
537
538             binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
539             dri_bo_emit_reloc(bo,
540                               I915_GEM_DOMAIN_INSTRUCTION, 0,
541                               0,
542                               i * sizeof(*binding_table),
543                               pp_context->surfaces[i].ss_bo);
544         }
545     
546     }
547
548     dri_bo_unmap(bo);
549 }
550
551 static void
552 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
553 {
554     struct i965_vfe_state *vfe_state;
555     dri_bo *bo;
556
557     bo = pp_context->vfe_state.bo;
558     dri_bo_map(bo, 1);
559     assert(bo->virtual);
560     vfe_state = bo->virtual;
561     memset(vfe_state, 0, sizeof(*vfe_state));
562     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
563     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
564     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
565     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
566     vfe_state->vfe1.children_present = 0;
567     vfe_state->vfe2.interface_descriptor_base = 
568         pp_context->idrt.bo->offset >> 4; /* reloc */
569     dri_bo_emit_reloc(bo,
570                       I915_GEM_DOMAIN_INSTRUCTION, 0,
571                       0,
572                       offsetof(struct i965_vfe_state, vfe2),
573                       pp_context->idrt.bo);
574     dri_bo_unmap(bo);
575 }
576
577 static void
578 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
579 {
580     unsigned char *constant_buffer;
581
582     assert(sizeof(pp_static_parameter) == 128);
583     dri_bo_map(pp_context->curbe.bo, 1);
584     assert(pp_context->curbe.bo->virtual);
585     constant_buffer = pp_context->curbe.bo->virtual;
586     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
587     dri_bo_unmap(pp_context->curbe.bo);
588 }
589
590 static void
591 ironlake_pp_states_setup(VADriverContextP ctx)
592 {
593     struct i965_driver_data *i965 = i965_driver_data(ctx);
594     struct i965_post_processing_context *pp_context = i965->pp_context;
595
596     ironlake_pp_surface_state(pp_context);
597     ironlake_pp_binding_table(pp_context);
598     ironlake_pp_interface_descriptor_table(pp_context);
599     ironlake_pp_vfe_state(pp_context);
600     ironlake_pp_upload_constants(pp_context);
601 }
602
603 static void
604 ironlake_pp_pipeline_select(VADriverContextP ctx)
605 {
606     BEGIN_BATCH(ctx, 1);
607     OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
608     ADVANCE_BATCH(ctx);
609 }
610
611 static void
612 ironlake_pp_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
613 {
614     unsigned int vfe_fence, cs_fence;
615
616     vfe_fence = pp_context->urb.cs_start;
617     cs_fence = pp_context->urb.size;
618
619     BEGIN_BATCH(ctx, 3);
620     OUT_BATCH(ctx, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
621     OUT_BATCH(ctx, 0);
622     OUT_BATCH(ctx, 
623               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
624               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
625     ADVANCE_BATCH(ctx);
626 }
627
628 static void
629 ironlake_pp_state_base_address(VADriverContextP ctx)
630 {
631     BEGIN_BATCH(ctx, 8);
632     OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6);
633     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
634     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
635     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
636     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
637     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
638     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
639     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
640     ADVANCE_BATCH(ctx);
641 }
642
643 static void
644 ironlake_pp_state_pointers(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
645 {
646     BEGIN_BATCH(ctx, 3);
647     OUT_BATCH(ctx, CMD_MEDIA_STATE_POINTERS | 1);
648     OUT_BATCH(ctx, 0);
649     OUT_RELOC(ctx, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
650     ADVANCE_BATCH(ctx);
651 }
652
653 static void 
654 ironlake_pp_cs_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
655 {
656     BEGIN_BATCH(ctx, 2);
657     OUT_BATCH(ctx, CMD_CS_URB_STATE | 0);
658     OUT_BATCH(ctx,
659               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
660               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
661     ADVANCE_BATCH(ctx);
662 }
663
664 static void
665 ironlake_pp_constant_buffer(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
666 {
667     BEGIN_BATCH(ctx, 2);
668     OUT_BATCH(ctx, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
669     OUT_RELOC(ctx, pp_context->curbe.bo,
670               I915_GEM_DOMAIN_INSTRUCTION, 0,
671               pp_context->urb.size_cs_entry - 1);
672     ADVANCE_BATCH(ctx);    
673 }
674
675 static void
676 ironlake_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
677 {
678     int x, x_steps, y, y_steps;
679
680     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
681     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
682
683     for (y = 0; y < y_steps; y++) {
684         for (x = 0; x < x_steps; x++) {
685             if (!pp_context->pp_set_block_parameter(&pp_context->private_context, x, y)) {
686                 BEGIN_BATCH(ctx, 20);
687                 OUT_BATCH(ctx, CMD_MEDIA_OBJECT | 18);
688                 OUT_BATCH(ctx, 0);
689                 OUT_BATCH(ctx, 0); /* no indirect data */
690                 OUT_BATCH(ctx, 0);
691
692                 /* inline data grf 5-6 */
693                 assert(sizeof(pp_inline_parameter) == 64);
694                 intel_batchbuffer_data(ctx, &pp_inline_parameter, sizeof(pp_inline_parameter));
695
696                 ADVANCE_BATCH(ctx);
697             }
698         }
699     }
700 }
701
702 static void
703 ironlake_pp_pipeline_setup(VADriverContextP ctx)
704 {
705     struct i965_driver_data *i965 = i965_driver_data(ctx);
706     struct i965_post_processing_context *pp_context = i965->pp_context;
707
708     intel_batchbuffer_start_atomic(ctx, 0x1000);
709     intel_batchbuffer_emit_mi_flush(ctx);
710     ironlake_pp_pipeline_select(ctx);
711     ironlake_pp_state_base_address(ctx);
712     ironlake_pp_state_pointers(ctx, pp_context);
713     ironlake_pp_urb_layout(ctx, pp_context);
714     ironlake_pp_cs_urb_layout(ctx, pp_context);
715     ironlake_pp_constant_buffer(ctx, pp_context);
716     ironlake_pp_object_walker(ctx, pp_context);
717     intel_batchbuffer_end_atomic(ctx);
718 }
719
720 static int
721 pp_null_x_steps(void *private_context)
722 {
723     return 1;
724 }
725
726 static int
727 pp_null_y_steps(void *private_context)
728 {
729     return 1;
730 }
731
732 static int
733 pp_null_set_block_parameter(void *private_context, int x, int y)
734 {
735     return 0;
736 }
737
738 static void
739 pp_null_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
740                    unsigned short srcw, unsigned short srch,
741                    unsigned short destw, unsigned short desth)
742 {
743     struct i965_driver_data *i965 = i965_driver_data(ctx);
744     struct i965_post_processing_context *pp_context = i965->pp_context;
745     struct object_surface *obj_surface;
746
747     /* surface */
748     obj_surface = SURFACE(surface);
749     dri_bo_unreference(obj_surface->pp_out_bo);
750     obj_surface->pp_out_bo = obj_surface->bo;
751     dri_bo_reference(obj_surface->pp_out_bo);
752     assert(obj_surface->pp_out_bo);
753     obj_surface->pp_out_width = obj_surface->width;
754     obj_surface->pp_out_height = obj_surface->height;
755     obj_surface->orig_pp_out_width = obj_surface->orig_width;
756     obj_surface->orig_pp_out_height = obj_surface->orig_height;
757
758     /* private function & data */
759     pp_context->pp_x_steps = pp_null_x_steps;
760     pp_context->pp_y_steps = pp_null_y_steps;
761     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
762 }
763
764 static int
765 pp_load_save_x_steps(void *private_context)
766 {
767     return 1;
768 }
769
770 static int
771 pp_load_save_y_steps(void *private_context)
772 {
773     struct pp_load_save_context *pp_load_save_context = private_context;
774
775     return pp_load_save_context->dest_h / 8;
776 }
777
778 static int
779 pp_load_save_set_block_parameter(void *private_context, int x, int y)
780 {
781     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
782     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
783     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
784     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
785
786     return 0;
787 }
788
789 static void
790 pp_nv12_load_save_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
791                              unsigned short srcw, unsigned short srch,
792                              unsigned short destw, unsigned short desth)
793 {
794     struct i965_driver_data *i965 = i965_driver_data(ctx);
795     struct i965_post_processing_context *pp_context = i965->pp_context;
796     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
797     struct object_surface *obj_surface;
798     struct i965_surface_state *ss;
799     dri_bo *bo;
800     int index, w, h;
801     int orig_w, orig_h;
802     unsigned int tiling, swizzle;
803
804     /* surface */
805     obj_surface = SURFACE(surface);
806     orig_w = obj_surface->orig_width;
807     orig_h = obj_surface->orig_height;
808     w = obj_surface->width;
809     h = obj_surface->height;
810
811     dri_bo_unreference(obj_surface->pp_out_bo);
812     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
813                                           "intermediate surface",
814                                           SIZE_YUV420(w, h),
815                                           4096);
816     assert(obj_surface->pp_out_bo);
817     obj_surface->pp_out_width = obj_surface->width;
818     obj_surface->pp_out_height = obj_surface->height;
819     obj_surface->orig_pp_out_width = obj_surface->orig_width;
820     obj_surface->orig_pp_out_height = obj_surface->orig_height;
821
822     /* source Y surface index 1 */
823     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
824
825     index = 1;
826     pp_context->surfaces[index].s_bo = obj_surface->bo;
827     dri_bo_reference(pp_context->surfaces[index].s_bo);
828     bo = dri_bo_alloc(i965->intel.bufmgr, 
829                       "surface state", 
830                       sizeof(struct i965_surface_state), 
831                       4096);
832     assert(bo);
833     pp_context->surfaces[index].ss_bo = bo;
834     dri_bo_map(bo, True);
835     assert(bo->virtual);
836     ss = bo->virtual;
837     memset(ss, 0, sizeof(*ss));
838     ss->ss0.surface_type = I965_SURFACE_2D;
839     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
840     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
841     ss->ss2.width = orig_w / 4 - 1;
842     ss->ss2.height = orig_h - 1;
843     ss->ss3.pitch = w - 1;
844     pp_set_surface_tiling(ss, tiling);
845     dri_bo_emit_reloc(bo,
846                       I915_GEM_DOMAIN_RENDER, 
847                       0,
848                       0,
849                       offsetof(struct i965_surface_state, ss1),
850                       pp_context->surfaces[index].s_bo);
851     dri_bo_unmap(bo);
852
853     /* source UV surface index 2 */
854     index = 2;
855     pp_context->surfaces[index].s_bo = obj_surface->bo;
856     dri_bo_reference(pp_context->surfaces[index].s_bo);
857     bo = dri_bo_alloc(i965->intel.bufmgr, 
858                       "surface state", 
859                       sizeof(struct i965_surface_state), 
860                       4096);
861     assert(bo);
862     pp_context->surfaces[index].ss_bo = bo;
863     dri_bo_map(bo, True);
864     assert(bo->virtual);
865     ss = bo->virtual;
866     memset(ss, 0, sizeof(*ss));
867     ss->ss0.surface_type = I965_SURFACE_2D;
868     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
869     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
870     ss->ss2.width = orig_w / 4 - 1;
871     ss->ss2.height = orig_h / 2 - 1;
872     ss->ss3.pitch = w - 1;
873     pp_set_surface_tiling(ss, tiling);
874     dri_bo_emit_reloc(bo,
875                       I915_GEM_DOMAIN_RENDER, 
876                       0,
877                       w * h,
878                       offsetof(struct i965_surface_state, ss1),
879                       pp_context->surfaces[index].s_bo);
880     dri_bo_unmap(bo);
881
882     /* destination Y surface index 7 */
883     index = 7;
884     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
885     dri_bo_reference(pp_context->surfaces[index].s_bo);
886     bo = dri_bo_alloc(i965->intel.bufmgr, 
887                       "surface state", 
888                       sizeof(struct i965_surface_state), 
889                       4096);
890     assert(bo);
891     pp_context->surfaces[index].ss_bo = bo;
892     dri_bo_map(bo, True);
893     assert(bo->virtual);
894     ss = bo->virtual;
895     memset(ss, 0, sizeof(*ss));
896     ss->ss0.surface_type = I965_SURFACE_2D;
897     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
898     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
899     ss->ss2.width = orig_w / 4 - 1;
900     ss->ss2.height = orig_h - 1;
901     ss->ss3.pitch = w - 1;
902     dri_bo_emit_reloc(bo,
903                       I915_GEM_DOMAIN_RENDER, 
904                       I915_GEM_DOMAIN_RENDER,
905                       0,
906                       offsetof(struct i965_surface_state, ss1),
907                       pp_context->surfaces[index].s_bo);
908     dri_bo_unmap(bo);
909
910     /* destination UV surface index 8 */
911     index = 8;
912     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
913     dri_bo_reference(pp_context->surfaces[index].s_bo);
914     bo = dri_bo_alloc(i965->intel.bufmgr, 
915                       "surface state", 
916                       sizeof(struct i965_surface_state), 
917                       4096);
918     assert(bo);
919     pp_context->surfaces[index].ss_bo = bo;
920     dri_bo_map(bo, True);
921     assert(bo->virtual);
922     ss = bo->virtual;
923     memset(ss, 0, sizeof(*ss));
924     ss->ss0.surface_type = I965_SURFACE_2D;
925     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
926     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
927     ss->ss2.width = orig_w / 4 - 1;
928     ss->ss2.height = orig_h / 2 - 1;
929     ss->ss3.pitch = w - 1;
930     dri_bo_emit_reloc(bo,
931                       I915_GEM_DOMAIN_RENDER, 
932                       I915_GEM_DOMAIN_RENDER,
933                       w * h,
934                       offsetof(struct i965_surface_state, ss1),
935                       pp_context->surfaces[index].s_bo);
936     dri_bo_unmap(bo);
937
938     /* private function & data */
939     pp_context->pp_x_steps = pp_load_save_x_steps;
940     pp_context->pp_y_steps = pp_load_save_y_steps;
941     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
942     pp_load_save_context->dest_h = h;
943     pp_load_save_context->dest_w = w;
944
945     pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
946     pp_inline_parameter.grf5.number_blocks = w / 16;
947 }
948
949 static int
950 pp_scaling_x_steps(void *private_context)
951 {
952     return 1;
953 }
954
955 static int
956 pp_scaling_y_steps(void *private_context)
957 {
958     struct pp_scaling_context *pp_scaling_context = private_context;
959
960     return pp_scaling_context->dest_h / 8;
961 }
962
963 static int
964 pp_scaling_set_block_parameter(void *private_context, int x, int y)
965 {
966     float src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
967     float src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
968
969     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16;
970     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8;
971     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
972     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
973     
974     return 0;
975 }
976
977 static void
978 pp_nv12_scaling_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
979                            unsigned short srcw, unsigned short srch,
980                            unsigned short destw, unsigned short desth)
981 {
982     struct i965_driver_data *i965 = i965_driver_data(ctx);
983     struct i965_post_processing_context *pp_context = i965->pp_context;
984     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
985     struct object_surface *obj_surface;
986     struct i965_sampler_state *sampler_state;
987     struct i965_surface_state *ss;
988     dri_bo *bo;
989     int index;
990     int w, h;
991     int orig_w, orig_h;
992     int pp_out_w, pp_out_h;
993     int orig_pp_out_w, orig_pp_out_h;
994     unsigned int tiling, swizzle;
995
996     /* surface */
997     obj_surface = SURFACE(surface);
998     orig_w = obj_surface->orig_width;
999     orig_h = obj_surface->orig_height;
1000     w = obj_surface->width;
1001     h = obj_surface->height;
1002
1003     orig_pp_out_w = destw;
1004     orig_pp_out_h = desth;
1005     pp_out_w = ALIGN(orig_pp_out_w, 16);
1006     pp_out_h = ALIGN(orig_pp_out_h, 16);
1007     dri_bo_unreference(obj_surface->pp_out_bo);
1008     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
1009                                           "intermediate surface",
1010                                           SIZE_YUV420(pp_out_w, pp_out_h),
1011                                           4096);
1012     assert(obj_surface->pp_out_bo);
1013     obj_surface->orig_pp_out_width = orig_pp_out_w;
1014     obj_surface->orig_pp_out_height = orig_pp_out_h;
1015     obj_surface->pp_out_width = pp_out_w;
1016     obj_surface->pp_out_height = pp_out_h;
1017
1018     /* source Y surface index 1 */
1019     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
1020
1021     index = 1;
1022     pp_context->surfaces[index].s_bo = obj_surface->bo;
1023     dri_bo_reference(pp_context->surfaces[index].s_bo);
1024     bo = dri_bo_alloc(i965->intel.bufmgr, 
1025                       "surface state", 
1026                       sizeof(struct i965_surface_state), 
1027                       4096);
1028     assert(bo);
1029     pp_context->surfaces[index].ss_bo = bo;
1030     dri_bo_map(bo, True);
1031     assert(bo->virtual);
1032     ss = bo->virtual;
1033     memset(ss, 0, sizeof(*ss));
1034     ss->ss0.surface_type = I965_SURFACE_2D;
1035     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1036     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1037     ss->ss2.width = orig_w - 1;
1038     ss->ss2.height = orig_h - 1;
1039     ss->ss3.pitch = w - 1;
1040     pp_set_surface_tiling(ss, tiling);
1041     dri_bo_emit_reloc(bo,
1042                       I915_GEM_DOMAIN_RENDER, 
1043                       0,
1044                       0,
1045                       offsetof(struct i965_surface_state, ss1),
1046                       pp_context->surfaces[index].s_bo);
1047     dri_bo_unmap(bo);
1048
1049     /* source UV surface index 2 */
1050     index = 2;
1051     pp_context->surfaces[index].s_bo = obj_surface->bo;
1052     dri_bo_reference(pp_context->surfaces[index].s_bo);
1053     bo = dri_bo_alloc(i965->intel.bufmgr, 
1054                       "surface state", 
1055                       sizeof(struct i965_surface_state), 
1056                       4096);
1057     assert(bo);
1058     pp_context->surfaces[index].ss_bo = bo;
1059     dri_bo_map(bo, True);
1060     assert(bo->virtual);
1061     ss = bo->virtual;
1062     memset(ss, 0, sizeof(*ss));
1063     ss->ss0.surface_type = I965_SURFACE_2D;
1064     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1065     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1066     ss->ss2.width = orig_w / 2 - 1;
1067     ss->ss2.height = orig_h / 2 - 1;
1068     ss->ss3.pitch = w - 1;
1069     pp_set_surface_tiling(ss, tiling);
1070     dri_bo_emit_reloc(bo,
1071                       I915_GEM_DOMAIN_RENDER, 
1072                       0,
1073                       w * h,
1074                       offsetof(struct i965_surface_state, ss1),
1075                       pp_context->surfaces[index].s_bo);
1076     dri_bo_unmap(bo);
1077
1078     /* destination Y surface index 7 */
1079     index = 7;
1080     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1081     dri_bo_reference(pp_context->surfaces[index].s_bo);
1082     bo = dri_bo_alloc(i965->intel.bufmgr, 
1083                       "surface state", 
1084                       sizeof(struct i965_surface_state), 
1085                       4096);
1086     assert(bo);
1087     pp_context->surfaces[index].ss_bo = bo;
1088     dri_bo_map(bo, True);
1089     assert(bo->virtual);
1090     ss = bo->virtual;
1091     memset(ss, 0, sizeof(*ss));
1092     ss->ss0.surface_type = I965_SURFACE_2D;
1093     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1094     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1095     ss->ss2.width = pp_out_w / 4 - 1;
1096     ss->ss2.height = pp_out_h - 1;
1097     ss->ss3.pitch = pp_out_w - 1;
1098     dri_bo_emit_reloc(bo,
1099                       I915_GEM_DOMAIN_RENDER, 
1100                       I915_GEM_DOMAIN_RENDER,
1101                       0,
1102                       offsetof(struct i965_surface_state, ss1),
1103                       pp_context->surfaces[index].s_bo);
1104     dri_bo_unmap(bo);
1105
1106     /* destination UV surface index 8 */
1107     index = 8;
1108     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1109     dri_bo_reference(pp_context->surfaces[index].s_bo);
1110     bo = dri_bo_alloc(i965->intel.bufmgr, 
1111                       "surface state", 
1112                       sizeof(struct i965_surface_state), 
1113                       4096);
1114     assert(bo);
1115     pp_context->surfaces[index].ss_bo = bo;
1116     dri_bo_map(bo, True);
1117     assert(bo->virtual);
1118     ss = bo->virtual;
1119     memset(ss, 0, sizeof(*ss));
1120     ss->ss0.surface_type = I965_SURFACE_2D;
1121     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1122     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + pp_out_w * pp_out_h;
1123     ss->ss2.width = pp_out_w / 4 - 1;
1124     ss->ss2.height = pp_out_h / 2 - 1;
1125     ss->ss3.pitch = pp_out_w - 1;
1126     dri_bo_emit_reloc(bo,
1127                       I915_GEM_DOMAIN_RENDER, 
1128                       I915_GEM_DOMAIN_RENDER,
1129                       pp_out_w * pp_out_h,
1130                       offsetof(struct i965_surface_state, ss1),
1131                       pp_context->surfaces[index].s_bo);
1132     dri_bo_unmap(bo);
1133
1134     /* sampler state */
1135     dri_bo_map(pp_context->sampler_state_table.bo, True);
1136     assert(pp_context->sampler_state_table.bo->virtual);
1137     sampler_state = pp_context->sampler_state_table.bo->virtual;
1138
1139     /* SIMD16 Y index 1 */
1140     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
1141     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1142     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1143     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1144     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1145
1146     /* SIMD16 UV index 2 */
1147     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
1148     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1149     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1150     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1151     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1152
1153     dri_bo_unmap(pp_context->sampler_state_table.bo);
1154
1155     /* private function & data */
1156     pp_context->pp_x_steps = pp_scaling_x_steps;
1157     pp_context->pp_y_steps = pp_scaling_y_steps;
1158     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
1159
1160     pp_scaling_context->dest_w = pp_out_w;
1161     pp_scaling_context->dest_h = pp_out_h;
1162
1163     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) 1.0 / pp_out_h;
1164     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) 1.0 / pp_out_w;
1165     pp_inline_parameter.grf5.block_count_x = pp_out_w / 16;   /* 1 x N */
1166     pp_inline_parameter.grf5.number_blocks = pp_out_w / 16;
1167     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1168     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1169 }
1170
1171 static int
1172 pp_avs_x_steps(void *private_context)
1173 {
1174     struct pp_avs_context *pp_avs_context = private_context;
1175
1176     return pp_avs_context->dest_w / 16;
1177 }
1178
1179 static int
1180 pp_avs_y_steps(void *private_context)
1181 {
1182     return 1;
1183 }
1184
1185 static int
1186 pp_avs_set_block_parameter(void *private_context, int x, int y)
1187 {
1188     struct pp_avs_context *pp_avs_context = private_context;
1189     float src_x_steping, src_y_steping, video_step_delta;
1190     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
1191
1192     if (tmp_w >= pp_avs_context->dest_w) {
1193         pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1194         pp_inline_parameter.grf6.video_step_delta = 0;
1195         
1196         if (x == 0) {
1197             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2;
1198         } else {
1199             src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1200             video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1201             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1202                 16 * 15 * video_step_delta / 2;
1203         }
1204     } else {
1205         int n0, n1, n2, nls_left, nls_right;
1206         int factor_a = 5, factor_b = 4;
1207         float f;
1208
1209         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1210         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1211         n2 = tmp_w / (16 * factor_a);
1212         nls_left = n0 + n2;
1213         nls_right = n1 + n2;
1214         f = (float) n2 * 16 / tmp_w;
1215         
1216         if (n0 < 5) {
1217             pp_inline_parameter.grf6.video_step_delta = 0.0;
1218
1219             if (x == 0) {
1220                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1221                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = 0.0;
1222             } else {
1223                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1224                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1225                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1226                     16 * 15 * video_step_delta / 2;
1227             }
1228         } else {
1229             if (x < nls_left) {
1230                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1231                 float a = f / (nls_left * 16 * factor_b);
1232                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1233                 
1234                 pp_inline_parameter.grf6.video_step_delta = b;
1235
1236                 if (x == 0) {
1237                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = 0.0;
1238                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a;
1239                 } else {
1240                     src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1241                     video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1242                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1243                         16 * 15 * video_step_delta / 2;
1244                     pp_inline_parameter.grf5.normalized_video_x_scaling_step += 16 * b;
1245                 }
1246             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1247                 /* scale the center linearly */
1248                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1249                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1250                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1251                     16 * 15 * video_step_delta / 2;
1252                 pp_inline_parameter.grf6.video_step_delta = 0.0;
1253                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1254             } else {
1255                 float a = f / (nls_right * 16 * factor_b);
1256                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1257
1258                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1259                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1260                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1261                     16 * 15 * video_step_delta / 2;
1262                 pp_inline_parameter.grf6.video_step_delta = -b;
1263
1264                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
1265                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
1266                 else
1267                     pp_inline_parameter.grf5.normalized_video_x_scaling_step -= b * 16;
1268             }
1269         }
1270     }
1271
1272     src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
1273     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8;
1274     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1275     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
1276
1277     return 0;
1278 }
1279
1280 static void
1281 pp_nv12_avs_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
1282                        unsigned short srcw, unsigned short srch,
1283                        unsigned short destw, unsigned short desth)
1284 {
1285     struct i965_driver_data *i965 = i965_driver_data(ctx);
1286     struct i965_post_processing_context *pp_context = i965->pp_context;
1287     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1288     struct object_surface *obj_surface;
1289     struct i965_surface_state *ss;
1290     struct i965_sampler_8x8 *sampler_8x8;
1291     struct i965_sampler_8x8_state *sampler_8x8_state;
1292     struct i965_surface_state2 *ss_8x8;
1293     dri_bo *bo, *src_bo;
1294     int index;
1295     int w, h;
1296     int orig_w, orig_h;
1297     int pp_out_w, pp_out_h;
1298     int orig_pp_out_w, orig_pp_out_h;
1299     unsigned int tiling, swizzle;
1300
1301     /* surface */
1302     obj_surface = SURFACE(surface);
1303     
1304     if (input == 1) {
1305         orig_w = obj_surface->orig_pp_out_width;
1306         orig_h = obj_surface->orig_pp_out_height;
1307         w = obj_surface->pp_out_width;
1308         h = obj_surface->pp_out_height;
1309         src_bo = obj_surface->pp_out_bo;
1310     } else {
1311         orig_w = obj_surface->orig_width;
1312         orig_h = obj_surface->orig_height;
1313         w = obj_surface->width;
1314         h = obj_surface->height;
1315         src_bo = obj_surface->bo;
1316     }
1317
1318     assert(src_bo);
1319     dri_bo_get_tiling(src_bo, &tiling, &swizzle);
1320
1321     /* source Y surface index 1 */
1322     index = 1;
1323     pp_context->surfaces[index].s_bo = src_bo;
1324     dri_bo_reference(pp_context->surfaces[index].s_bo);
1325     bo = dri_bo_alloc(i965->intel.bufmgr, 
1326                       "Y surface state for sample_8x8", 
1327                       sizeof(struct i965_surface_state2), 
1328                       4096);
1329     assert(bo);
1330     pp_context->surfaces[index].ss_bo = bo;
1331     dri_bo_map(bo, True);
1332     assert(bo->virtual);
1333     ss_8x8 = bo->virtual;
1334     memset(ss_8x8, 0, sizeof(*ss_8x8));
1335     ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
1336     ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
1337     ss_8x8->ss1.width = orig_w - 1;
1338     ss_8x8->ss1.height = orig_h - 1;
1339     ss_8x8->ss2.half_pitch_for_chroma = 0;
1340     ss_8x8->ss2.pitch = w - 1;
1341     ss_8x8->ss2.interleave_chroma = 0;
1342     ss_8x8->ss2.surface_format = SURFACE_FORMAT_Y8_UNORM;
1343     ss_8x8->ss3.x_offset_for_cb = 0;
1344     ss_8x8->ss3.y_offset_for_cb = 0;
1345     pp_set_surface2_tiling(ss_8x8, tiling);
1346     dri_bo_emit_reloc(bo,
1347                       I915_GEM_DOMAIN_RENDER, 
1348                       0,
1349                       0,
1350                       offsetof(struct i965_surface_state2, ss0),
1351                       pp_context->surfaces[index].s_bo);
1352     dri_bo_unmap(bo);
1353
1354     /* source UV surface index 2 */
1355     index = 2;
1356     pp_context->surfaces[index].s_bo = src_bo;
1357     dri_bo_reference(pp_context->surfaces[index].s_bo);
1358     bo = dri_bo_alloc(i965->intel.bufmgr, 
1359                       "UV surface state for sample_8x8", 
1360                       sizeof(struct i965_surface_state2), 
1361                       4096);
1362     assert(bo);
1363     pp_context->surfaces[index].ss_bo = bo;
1364     dri_bo_map(bo, True);
1365     assert(bo->virtual);
1366     ss_8x8 = bo->virtual;
1367     memset(ss_8x8, 0, sizeof(*ss_8x8));
1368     ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset + w * h;
1369     ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
1370     ss_8x8->ss1.width = orig_w - 1;
1371     ss_8x8->ss1.height = orig_h - 1;
1372     ss_8x8->ss2.half_pitch_for_chroma = 0;
1373     ss_8x8->ss2.pitch = w - 1;
1374     ss_8x8->ss2.interleave_chroma = 1;
1375     ss_8x8->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
1376     ss_8x8->ss3.x_offset_for_cb = 0;
1377     ss_8x8->ss3.y_offset_for_cb = 0;
1378     pp_set_surface2_tiling(ss_8x8, tiling);
1379     dri_bo_emit_reloc(bo,
1380                       I915_GEM_DOMAIN_RENDER, 
1381                       0,
1382                       w * h,
1383                       offsetof(struct i965_surface_state2, ss0),
1384                       pp_context->surfaces[index].s_bo);
1385     dri_bo_unmap(bo);
1386
1387     orig_pp_out_w = destw;
1388     orig_pp_out_h = desth;
1389     pp_out_w = ALIGN(orig_pp_out_w, 16);
1390     pp_out_h = ALIGN(orig_pp_out_h, 16);
1391     dri_bo_unreference(obj_surface->pp_out_bo);
1392     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
1393                                           "intermediate surface",
1394                                           SIZE_YUV420(pp_out_w, pp_out_h),
1395                                           4096);
1396     assert(obj_surface->pp_out_bo);
1397     obj_surface->orig_pp_out_width = orig_pp_out_w;
1398     obj_surface->orig_pp_out_height = orig_pp_out_h;
1399     obj_surface->pp_out_width = pp_out_w;
1400     obj_surface->pp_out_height = pp_out_h;
1401
1402     /* destination Y surface index 7 */
1403     index = 7;
1404     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1405     dri_bo_reference(pp_context->surfaces[index].s_bo);
1406     bo = dri_bo_alloc(i965->intel.bufmgr, 
1407                       "surface state", 
1408                       sizeof(struct i965_surface_state), 
1409                       4096);
1410     assert(bo);
1411     pp_context->surfaces[index].ss_bo = bo;
1412     dri_bo_map(bo, True);
1413     assert(bo->virtual);
1414     ss = bo->virtual;
1415     memset(ss, 0, sizeof(*ss));
1416     ss->ss0.surface_type = I965_SURFACE_2D;
1417     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1418     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1419     ss->ss2.width = pp_out_w / 4 - 1;
1420     ss->ss2.height = pp_out_h - 1;
1421     ss->ss3.pitch = pp_out_w - 1;
1422     dri_bo_emit_reloc(bo,
1423                       I915_GEM_DOMAIN_RENDER, 
1424                       I915_GEM_DOMAIN_RENDER,
1425                       0,
1426                       offsetof(struct i965_surface_state, ss1),
1427                       pp_context->surfaces[index].s_bo);
1428     dri_bo_unmap(bo);
1429
1430     /* destination UV surface index 8 */
1431     index = 8;
1432     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1433     dri_bo_reference(pp_context->surfaces[index].s_bo);
1434     bo = dri_bo_alloc(i965->intel.bufmgr, 
1435                       "surface state", 
1436                       sizeof(struct i965_surface_state), 
1437                       4096);
1438     assert(bo);
1439     pp_context->surfaces[index].ss_bo = bo;
1440     dri_bo_map(bo, True);
1441     assert(bo->virtual);
1442     ss = bo->virtual;
1443     memset(ss, 0, sizeof(*ss));
1444     ss->ss0.surface_type = I965_SURFACE_2D;
1445     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1446     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + pp_out_w * pp_out_h;
1447     ss->ss2.width = pp_out_w / 4 - 1;
1448     ss->ss2.height = pp_out_h / 2 - 1;
1449     ss->ss3.pitch = pp_out_w - 1;
1450     dri_bo_emit_reloc(bo,
1451                       I915_GEM_DOMAIN_RENDER, 
1452                       I915_GEM_DOMAIN_RENDER,
1453                       pp_out_w * pp_out_h,
1454                       offsetof(struct i965_surface_state, ss1),
1455                       pp_context->surfaces[index].s_bo);
1456     dri_bo_unmap(bo);
1457     
1458     /* sampler 8x8 state */
1459     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1460     assert(pp_context->sampler_state_table.bo_8x8->virtual);
1461     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1462     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1463     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1464     sampler_8x8_state->dw136.default_sharpness_level = 0;
1465     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1466     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1467     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1468     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1469
1470     /* sampler 8x8 */
1471     dri_bo_map(pp_context->sampler_state_table.bo, True);
1472     assert(pp_context->sampler_state_table.bo->virtual);
1473     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
1474     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1475
1476     /* sample_8x8 Y index 1 */
1477     index = 1;
1478     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1479     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1480     sampler_8x8[index].dw0.ief_bypass = 0;
1481     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1482     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1483     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1484     sampler_8x8[index].dw2.global_noise_estimation = 22;
1485     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1486     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1487     sampler_8x8[index].dw3.strong_edge_weight = 7;
1488     sampler_8x8[index].dw3.regular_weight = 2;
1489     sampler_8x8[index].dw3.non_edge_weight = 0;
1490     sampler_8x8[index].dw3.gain_factor = 40;
1491     sampler_8x8[index].dw4.steepness_boost = 0;
1492     sampler_8x8[index].dw4.steepness_threshold = 0;
1493     sampler_8x8[index].dw4.mr_boost = 0;
1494     sampler_8x8[index].dw4.mr_threshold = 5;
1495     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1496     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1497     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1498     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1499     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1500     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1501     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1502     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1503     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1504     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1505     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1506     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1507     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1508     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1509     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1510     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1511     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1512     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1513     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1514     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1515     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1516     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1517     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1518     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1519     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1520     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1521     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1522     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1523     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1524     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1525     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1526     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1527     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1528     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1529     sampler_8x8[index].dw13.limiter_boost = 0;
1530     sampler_8x8[index].dw13.minimum_limiter = 10;
1531     sampler_8x8[index].dw13.maximum_limiter = 11;
1532     sampler_8x8[index].dw14.clip_limiter = 130;
1533     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1534                       I915_GEM_DOMAIN_RENDER, 
1535                       0,
1536                       0,
1537                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1538                       pp_context->sampler_state_table.bo_8x8);
1539
1540     dri_bo_map(pp_context->sampler_state_table.bo_8x8_uv, True);
1541     assert(pp_context->sampler_state_table.bo_8x8_uv->virtual);
1542     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1543     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8_uv->virtual;
1544     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1545     sampler_8x8_state->dw136.default_sharpness_level = 0;
1546     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0;
1547     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1548     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1549     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8_uv);
1550
1551     /* sample_8x8 UV index 2 */
1552     index = 2;
1553     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1554     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_NEAREST;
1555     sampler_8x8[index].dw0.ief_bypass = 0;
1556     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1557     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1558     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8_uv->offset >> 5;
1559     sampler_8x8[index].dw2.global_noise_estimation = 22;
1560     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1561     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1562     sampler_8x8[index].dw3.strong_edge_weight = 7;
1563     sampler_8x8[index].dw3.regular_weight = 2;
1564     sampler_8x8[index].dw3.non_edge_weight = 0;
1565     sampler_8x8[index].dw3.gain_factor = 40;
1566     sampler_8x8[index].dw4.steepness_boost = 0;
1567     sampler_8x8[index].dw4.steepness_threshold = 0;
1568     sampler_8x8[index].dw4.mr_boost = 0;
1569     sampler_8x8[index].dw4.mr_threshold = 5;
1570     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1571     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1572     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1573     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1574     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1575     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1576     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1577     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1578     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1579     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1580     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1581     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1582     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1583     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1584     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1585     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1586     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1587     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1588     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1589     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1590     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1591     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1592     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1593     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1594     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1595     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1596     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1597     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1598     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1599     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1600     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1601     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1602     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1603     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1604     sampler_8x8[index].dw13.limiter_boost = 0;
1605     sampler_8x8[index].dw13.minimum_limiter = 10;
1606     sampler_8x8[index].dw13.maximum_limiter = 11;
1607     sampler_8x8[index].dw14.clip_limiter = 130;
1608     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1609                       I915_GEM_DOMAIN_RENDER, 
1610                       0,
1611                       0,
1612                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1613                       pp_context->sampler_state_table.bo_8x8_uv);
1614
1615     dri_bo_unmap(pp_context->sampler_state_table.bo);
1616
1617     /* private function & data */
1618     pp_context->pp_x_steps = pp_avs_x_steps;
1619     pp_context->pp_y_steps = pp_avs_y_steps;
1620     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
1621
1622     pp_avs_context->dest_w = pp_out_w;
1623     pp_avs_context->dest_h = pp_out_h;
1624     pp_avs_context->src_w = w;
1625     pp_avs_context->src_h = h;
1626
1627     pp_static_parameter.grf4.r4_2.avs.nlas = 1;
1628     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) 1.0 / pp_out_h;
1629     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) 1.0 / pp_out_w;
1630     pp_inline_parameter.grf5.block_count_x = 1;        /* M x 1 */
1631     pp_inline_parameter.grf5.number_blocks = pp_out_h / 8;
1632     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1633     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1634     pp_inline_parameter.grf6.video_step_delta = 0.0;
1635 }
1636
1637 static int
1638 pp_dndi_x_steps(void *private_context)
1639 {
1640     return 1;
1641 }
1642
1643 static int
1644 pp_dndi_y_steps(void *private_context)
1645 {
1646     struct pp_dndi_context *pp_dndi_context = private_context;
1647
1648     return pp_dndi_context->dest_h / 4;
1649 }
1650
1651 static int
1652 pp_dndi_set_block_parameter(void *private_context, int x, int y)
1653 {
1654     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1655     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 4;
1656
1657     return 0;
1658 }
1659
1660 static 
1661 void pp_nv12_dndi_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
1662                              unsigned short srcw, unsigned short srch,
1663                              unsigned short destw, unsigned short desth)
1664 {
1665     struct i965_driver_data *i965 = i965_driver_data(ctx);
1666     struct i965_post_processing_context *pp_context = i965->pp_context;
1667     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
1668     struct object_surface *obj_surface;
1669     struct i965_surface_state *ss;
1670     struct i965_surface_state2 *ss_dndi;
1671     struct i965_sampler_dndi *sampler_dndi;
1672     dri_bo *bo;
1673     int index;
1674     int w, h;
1675     int orig_w, orig_h;
1676     unsigned int tiling, swizzle;
1677
1678     /* surface */
1679     obj_surface = SURFACE(surface);
1680     orig_w = obj_surface->orig_width;
1681     orig_h = obj_surface->orig_height;
1682     w = obj_surface->width;
1683     h = obj_surface->height;
1684
1685     if (pp_context->stmm.bo == NULL) {
1686         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
1687                                            "STMM surface",
1688                                            w * h,
1689                                            4096);
1690         assert(pp_context->stmm.bo);
1691     }
1692
1693     dri_bo_unreference(obj_surface->pp_out_bo);
1694     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
1695                                           "intermediate surface",
1696                                           SIZE_YUV420(w, h),
1697                                           4096);
1698     assert(obj_surface->pp_out_bo);
1699     obj_surface->orig_pp_out_width = orig_w;
1700     obj_surface->orig_pp_out_height = orig_h;
1701     obj_surface->pp_out_width = w;
1702     obj_surface->pp_out_height = h;
1703
1704     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
1705     /* source UV surface index 2 */
1706     index = 2;
1707     pp_context->surfaces[index].s_bo = obj_surface->bo;
1708     dri_bo_reference(pp_context->surfaces[index].s_bo);
1709     bo = dri_bo_alloc(i965->intel.bufmgr, 
1710                       "surface state", 
1711                       sizeof(struct i965_surface_state), 
1712                       4096);
1713     assert(bo);
1714     pp_context->surfaces[index].ss_bo = bo;
1715     dri_bo_map(bo, True);
1716     assert(bo->virtual);
1717     ss = bo->virtual;
1718     memset(ss, 0, sizeof(*ss));
1719     ss->ss0.surface_type = I965_SURFACE_2D;
1720     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1721     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1722     ss->ss2.width = orig_w / 4 - 1;
1723     ss->ss2.height = orig_h / 2 - 1;
1724     ss->ss3.pitch = w - 1;
1725     pp_set_surface_tiling(ss, tiling);
1726     dri_bo_emit_reloc(bo,
1727                       I915_GEM_DOMAIN_RENDER, 
1728                       0,
1729                       w * h,
1730                       offsetof(struct i965_surface_state, ss1),
1731                       pp_context->surfaces[index].s_bo);
1732     dri_bo_unmap(bo);
1733
1734     /* source YUV surface index 4 */
1735     index = 4;
1736     pp_context->surfaces[index].s_bo = obj_surface->bo;
1737     dri_bo_reference(pp_context->surfaces[index].s_bo);
1738     bo = dri_bo_alloc(i965->intel.bufmgr, 
1739                       "YUV surface state for deinterlace ", 
1740                       sizeof(struct i965_surface_state2), 
1741                       4096);
1742     assert(bo);
1743     pp_context->surfaces[index].ss_bo = bo;
1744     dri_bo_map(bo, True);
1745     assert(bo->virtual);
1746     ss_dndi = bo->virtual;
1747     memset(ss_dndi, 0, sizeof(*ss_dndi));
1748     ss_dndi->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
1749     ss_dndi->ss1.cbcr_pixel_offset_v_direction = 0;
1750     ss_dndi->ss1.width = w - 1;
1751     ss_dndi->ss1.height = h - 1;
1752     ss_dndi->ss1.cbcr_pixel_offset_v_direction = 1;
1753     ss_dndi->ss2.half_pitch_for_chroma = 0;
1754     ss_dndi->ss2.pitch = w - 1;
1755     ss_dndi->ss2.interleave_chroma = 1;
1756     ss_dndi->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
1757     ss_dndi->ss2.half_pitch_for_chroma = 0;
1758     ss_dndi->ss2.tiled_surface = 0;
1759     ss_dndi->ss3.x_offset_for_cb = 0;
1760     ss_dndi->ss3.y_offset_for_cb = h;
1761     pp_set_surface2_tiling(ss_dndi, tiling);
1762     dri_bo_emit_reloc(bo,
1763                       I915_GEM_DOMAIN_RENDER, 
1764                       0,
1765                       0,
1766                       offsetof(struct i965_surface_state2, ss0),
1767                       pp_context->surfaces[index].s_bo);
1768     dri_bo_unmap(bo);
1769
1770     /* source STMM surface index 20 */
1771     index = 20;
1772     pp_context->surfaces[index].s_bo = pp_context->stmm.bo;
1773     dri_bo_reference(pp_context->surfaces[index].s_bo);
1774     bo = dri_bo_alloc(i965->intel.bufmgr, 
1775                       "STMM surface state for deinterlace ", 
1776                       sizeof(struct i965_surface_state2), 
1777                       4096);
1778     assert(bo);
1779     pp_context->surfaces[index].ss_bo = bo;
1780     dri_bo_map(bo, True);
1781     assert(bo->virtual);
1782     ss = bo->virtual;
1783     memset(ss, 0, sizeof(*ss));
1784     ss->ss0.surface_type = I965_SURFACE_2D;
1785     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1786     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1787     ss->ss2.width = w - 1;
1788     ss->ss2.height = h - 1;
1789     ss->ss3.pitch = w - 1;
1790     dri_bo_emit_reloc(bo,
1791                       I915_GEM_DOMAIN_RENDER, 
1792                       I915_GEM_DOMAIN_RENDER,
1793                       0,
1794                       offsetof(struct i965_surface_state, ss1),
1795                       pp_context->surfaces[index].s_bo);
1796     dri_bo_unmap(bo);
1797
1798     /* destination Y surface index 7 */
1799     index = 7;
1800     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1801     dri_bo_reference(pp_context->surfaces[index].s_bo);
1802     bo = dri_bo_alloc(i965->intel.bufmgr, 
1803                       "surface state", 
1804                       sizeof(struct i965_surface_state), 
1805                       4096);
1806     assert(bo);
1807     pp_context->surfaces[index].ss_bo = bo;
1808     dri_bo_map(bo, True);
1809     assert(bo->virtual);
1810     ss = bo->virtual;
1811     memset(ss, 0, sizeof(*ss));
1812     ss->ss0.surface_type = I965_SURFACE_2D;
1813     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1814     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1815     ss->ss2.width = w / 4 - 1;
1816     ss->ss2.height = h - 1;
1817     ss->ss3.pitch = w - 1;
1818     dri_bo_emit_reloc(bo,
1819                       I915_GEM_DOMAIN_RENDER, 
1820                       I915_GEM_DOMAIN_RENDER,
1821                       0,
1822                       offsetof(struct i965_surface_state, ss1),
1823                       pp_context->surfaces[index].s_bo);
1824     dri_bo_unmap(bo);
1825
1826     /* destination UV surface index 8 */
1827     index = 8;
1828     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1829     dri_bo_reference(pp_context->surfaces[index].s_bo);
1830     bo = dri_bo_alloc(i965->intel.bufmgr, 
1831                       "surface state", 
1832                       sizeof(struct i965_surface_state), 
1833                       4096);
1834     assert(bo);
1835     pp_context->surfaces[index].ss_bo = bo;
1836     dri_bo_map(bo, True);
1837     assert(bo->virtual);
1838     ss = bo->virtual;
1839     memset(ss, 0, sizeof(*ss));
1840     ss->ss0.surface_type = I965_SURFACE_2D;
1841     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1842     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1843     ss->ss2.width = w / 4 - 1;
1844     ss->ss2.height = h / 2 - 1;
1845     ss->ss3.pitch = w - 1;
1846     dri_bo_emit_reloc(bo,
1847                       I915_GEM_DOMAIN_RENDER, 
1848                       I915_GEM_DOMAIN_RENDER,
1849                       w * h,
1850                       offsetof(struct i965_surface_state, ss1),
1851                       pp_context->surfaces[index].s_bo);
1852     dri_bo_unmap(bo);
1853
1854     /* sampler dndi */
1855     dri_bo_map(pp_context->sampler_state_table.bo, True);
1856     assert(pp_context->sampler_state_table.bo->virtual);
1857     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
1858     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
1859
1860     /* sample dndi index 1 */
1861     index = 0;
1862     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
1863     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
1864     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
1865     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
1866
1867     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
1868     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
1869     sampler_dndi[index].dw1.stmm_c2 = 0;
1870     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
1871     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
1872
1873     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
1874     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
1875     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
1876     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
1877
1878     sampler_dndi[index].dw3.maximum_stmm = 128;
1879     sampler_dndi[index].dw3.multipler_for_vecm = 2;
1880     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
1881     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
1882     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
1883
1884     sampler_dndi[index].dw4.sdi_delta = 8;
1885     sampler_dndi[index].dw4.sdi_threshold = 128;
1886     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
1887     sampler_dndi[index].dw4.stmm_shift_up = 0;
1888     sampler_dndi[index].dw4.stmm_shift_down = 0;
1889     sampler_dndi[index].dw4.minimum_stmm = 0;
1890
1891     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
1892     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
1893     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
1894     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
1895
1896     sampler_dndi[index].dw6.dn_enable = 1;
1897     sampler_dndi[index].dw6.di_enable = 1;
1898     sampler_dndi[index].dw6.di_partial = 0;
1899     sampler_dndi[index].dw6.dndi_top_first = 1;
1900     sampler_dndi[index].dw6.dndi_stream_id = 1;
1901     sampler_dndi[index].dw6.dndi_first_frame = 1;
1902     sampler_dndi[index].dw6.progressive_dn = 0;
1903     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
1904     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
1905     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
1906
1907     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
1908     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
1909     sampler_dndi[index].dw7.vdi_walker_enable = 0;
1910     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
1911
1912     dri_bo_unmap(pp_context->sampler_state_table.bo);
1913
1914     /* private function & data */
1915     pp_context->pp_x_steps = pp_dndi_x_steps;
1916     pp_context->pp_y_steps = pp_dndi_y_steps;
1917     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
1918
1919     pp_static_parameter.grf1.statistics_surface_picth = w / 2;
1920     pp_static_parameter.grf1.r1_6.di.top_field_first = 0;
1921     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m2 = 64;
1922     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m1 = 192;
1923
1924     pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
1925     pp_inline_parameter.grf5.number_blocks = w / 16;
1926     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1927     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1928
1929     pp_dndi_context->dest_w = w;
1930     pp_dndi_context->dest_h = h;
1931 }
1932
1933 static void
1934 ironlake_pp_initialize(VADriverContextP ctx,
1935                        VASurfaceID surface,
1936                        int input,
1937                        short srcx,
1938                        short srcy,
1939                        unsigned short srcw,
1940                        unsigned short srch,
1941                        short destx,
1942                        short desty,
1943                        unsigned short destw,
1944                        unsigned short desth,
1945                        int pp_index)
1946 {
1947     struct i965_driver_data *i965 = i965_driver_data(ctx);
1948     struct i965_post_processing_context *pp_context = i965->pp_context;
1949     struct pp_module *pp_module;
1950     dri_bo *bo;
1951     int i;
1952
1953     dri_bo_unreference(pp_context->curbe.bo);
1954     bo = dri_bo_alloc(i965->intel.bufmgr,
1955                       "constant buffer",
1956                       4096, 
1957                       4096);
1958     assert(bo);
1959     pp_context->curbe.bo = bo;
1960
1961     dri_bo_unreference(pp_context->binding_table.bo);
1962     bo = dri_bo_alloc(i965->intel.bufmgr, 
1963                       "binding table",
1964                       sizeof(unsigned int), 
1965                       4096);
1966     assert(bo);
1967     pp_context->binding_table.bo = bo;
1968
1969     dri_bo_unreference(pp_context->idrt.bo);
1970     bo = dri_bo_alloc(i965->intel.bufmgr, 
1971                       "interface discriptor", 
1972                       sizeof(struct i965_interface_descriptor), 
1973                       4096);
1974     assert(bo);
1975     pp_context->idrt.bo = bo;
1976     pp_context->idrt.num_interface_descriptors = 0;
1977
1978     dri_bo_unreference(pp_context->sampler_state_table.bo);
1979     bo = dri_bo_alloc(i965->intel.bufmgr, 
1980                       "sampler state table", 
1981                       4096,
1982                       4096);
1983     assert(bo);
1984     dri_bo_map(bo, True);
1985     memset(bo->virtual, 0, bo->size);
1986     dri_bo_unmap(bo);
1987     pp_context->sampler_state_table.bo = bo;
1988
1989     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1990     bo = dri_bo_alloc(i965->intel.bufmgr, 
1991                       "sampler 8x8 state ",
1992                       4096,
1993                       4096);
1994     assert(bo);
1995     pp_context->sampler_state_table.bo_8x8 = bo;
1996
1997     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1998     bo = dri_bo_alloc(i965->intel.bufmgr, 
1999                       "sampler 8x8 state ",
2000                       4096,
2001                       4096);
2002     assert(bo);
2003     pp_context->sampler_state_table.bo_8x8_uv = bo;
2004
2005     dri_bo_unreference(pp_context->vfe_state.bo);
2006     bo = dri_bo_alloc(i965->intel.bufmgr, 
2007                       "vfe state", 
2008                       sizeof(struct i965_vfe_state), 
2009                       4096);
2010     assert(bo);
2011     pp_context->vfe_state.bo = bo;
2012     
2013     for (i = 0; i < MAX_PP_SURFACES; i++) {
2014         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
2015         pp_context->surfaces[i].ss_bo = NULL;
2016
2017         dri_bo_unreference(pp_context->surfaces[i].s_bo);
2018         pp_context->surfaces[i].s_bo = NULL;
2019     }
2020
2021     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
2022     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
2023     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
2024     pp_context->current_pp = pp_index;
2025     pp_module = &pp_context->pp_modules[pp_index];
2026     
2027     if (pp_module->initialize)
2028         pp_module->initialize(ctx, surface, input, srcw, srch, destw, desth);
2029 }
2030
2031 static void
2032 ironlake_post_processing(VADriverContextP ctx,
2033                          VASurfaceID surface,
2034                          int input,
2035                          short srcx,
2036                          short srcy,
2037                          unsigned short srcw,
2038                          unsigned short srch,
2039                          short destx,
2040                          short desty,
2041                          unsigned short destw,
2042                          unsigned short desth,
2043                          int pp_index)
2044 {
2045     ironlake_pp_initialize(ctx, surface, input,
2046                            srcx, srcy, srcw, srch,
2047                            destx, desty, destw, desth,
2048                            pp_index);
2049     ironlake_pp_states_setup(ctx);
2050     ironlake_pp_pipeline_setup(ctx);
2051 }
2052
2053 static void
2054 gen6_pp_initialize(VADriverContextP ctx,
2055                    VASurfaceID surface,
2056                    int input,
2057                    short srcx,
2058                    short srcy,
2059                    unsigned short srcw,
2060                    unsigned short srch,
2061                    short destx,
2062                    short desty,
2063                    unsigned short destw,
2064                    unsigned short desth,
2065                    int pp_index)
2066 {
2067     struct i965_driver_data *i965 = i965_driver_data(ctx);
2068     struct i965_post_processing_context *pp_context = i965->pp_context;
2069     struct pp_module *pp_module;
2070     dri_bo *bo;
2071     int i;
2072
2073     dri_bo_unreference(pp_context->curbe.bo);
2074     bo = dri_bo_alloc(i965->intel.bufmgr,
2075                       "constant buffer",
2076                       4096, 
2077                       4096);
2078     assert(bo);
2079     pp_context->curbe.bo = bo;
2080
2081     dri_bo_unreference(pp_context->binding_table.bo);
2082     bo = dri_bo_alloc(i965->intel.bufmgr, 
2083                       "binding table",
2084                       sizeof(unsigned int), 
2085                       4096);
2086     assert(bo);
2087     pp_context->binding_table.bo = bo;
2088
2089     dri_bo_unreference(pp_context->idrt.bo);
2090     bo = dri_bo_alloc(i965->intel.bufmgr, 
2091                       "interface discriptor", 
2092                       sizeof(struct gen6_interface_descriptor_data), 
2093                       4096);
2094     assert(bo);
2095     pp_context->idrt.bo = bo;
2096     pp_context->idrt.num_interface_descriptors = 0;
2097
2098     dri_bo_unreference(pp_context->sampler_state_table.bo);
2099     bo = dri_bo_alloc(i965->intel.bufmgr, 
2100                       "sampler state table", 
2101                       4096,
2102                       4096);
2103     assert(bo);
2104     dri_bo_map(bo, True);
2105     memset(bo->virtual, 0, bo->size);
2106     dri_bo_unmap(bo);
2107     pp_context->sampler_state_table.bo = bo;
2108
2109     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
2110     bo = dri_bo_alloc(i965->intel.bufmgr, 
2111                       "sampler 8x8 state ",
2112                       4096,
2113                       4096);
2114     assert(bo);
2115     pp_context->sampler_state_table.bo_8x8 = bo;
2116
2117     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
2118     bo = dri_bo_alloc(i965->intel.bufmgr, 
2119                       "sampler 8x8 state ",
2120                       4096,
2121                       4096);
2122     assert(bo);
2123     pp_context->sampler_state_table.bo_8x8_uv = bo;
2124
2125     dri_bo_unreference(pp_context->vfe_state.bo);
2126     bo = dri_bo_alloc(i965->intel.bufmgr, 
2127                       "vfe state", 
2128                       sizeof(struct i965_vfe_state), 
2129                       4096);
2130     assert(bo);
2131     pp_context->vfe_state.bo = bo;
2132     
2133     for (i = 0; i < MAX_PP_SURFACES; i++) {
2134         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
2135         pp_context->surfaces[i].ss_bo = NULL;
2136
2137         dri_bo_unreference(pp_context->surfaces[i].s_bo);
2138         pp_context->surfaces[i].s_bo = NULL;
2139     }
2140
2141     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
2142     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
2143     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
2144     pp_context->current_pp = pp_index;
2145     pp_module = &pp_context->pp_modules[pp_index];
2146     
2147     if (pp_module->initialize)
2148         pp_module->initialize(ctx, surface, input, srcw, srch, destw, desth);
2149 }
2150
2151 static void
2152 gen6_pp_binding_table(struct i965_post_processing_context *pp_context)
2153 {
2154     unsigned int *binding_table;
2155     dri_bo *bo = pp_context->binding_table.bo;
2156     int i;
2157
2158     dri_bo_map(bo, 1);
2159     assert(bo->virtual);
2160     binding_table = bo->virtual;
2161     memset(binding_table, 0, bo->size);
2162
2163     for (i = 0; i < MAX_PP_SURFACES; i++) {
2164         if (pp_context->surfaces[i].ss_bo) {
2165             assert(pp_context->surfaces[i].s_bo);
2166
2167             binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
2168             dri_bo_emit_reloc(bo,
2169                               I915_GEM_DOMAIN_INSTRUCTION, 0,
2170                               0,
2171                               i * sizeof(*binding_table),
2172                               pp_context->surfaces[i].ss_bo);
2173         }
2174     
2175     }
2176
2177     dri_bo_unmap(bo);
2178 }
2179
2180 static void
2181 gen6_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
2182 {
2183     struct gen6_interface_descriptor_data *desc;
2184     dri_bo *bo;
2185     int pp_index = pp_context->current_pp;
2186
2187     bo = pp_context->idrt.bo;
2188     dri_bo_map(bo, True);
2189     assert(bo->virtual);
2190     desc = bo->virtual;
2191     memset(desc, 0, sizeof(*desc));
2192     desc->desc0.kernel_start_pointer = 
2193         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
2194     desc->desc1.single_program_flow = 1;
2195     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
2196     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
2197     desc->desc2.sampler_state_pointer = 
2198         pp_context->sampler_state_table.bo->offset >> 5;
2199     desc->desc3.binding_table_entry_count = 0;
2200     desc->desc3.binding_table_pointer = 
2201         pp_context->binding_table.bo->offset >> 5; /*reloc */
2202     desc->desc4.constant_urb_entry_read_offset = 0;
2203     desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
2204
2205     dri_bo_emit_reloc(bo,
2206                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2207                       0,
2208                       offsetof(struct gen6_interface_descriptor_data, desc0),
2209                       pp_context->pp_modules[pp_index].kernel.bo);
2210
2211     dri_bo_emit_reloc(bo,
2212                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2213                       desc->desc2.sampler_count << 2,
2214                       offsetof(struct gen6_interface_descriptor_data, desc2),
2215                       pp_context->sampler_state_table.bo);
2216
2217     dri_bo_emit_reloc(bo,
2218                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2219                       desc->desc3.binding_table_entry_count,
2220                       offsetof(struct gen6_interface_descriptor_data, desc3),
2221                       pp_context->binding_table.bo);
2222
2223     dri_bo_unmap(bo);
2224     pp_context->idrt.num_interface_descriptors++;
2225 }
2226
2227 static void
2228 gen6_pp_upload_constants(struct i965_post_processing_context *pp_context)
2229 {
2230     unsigned char *constant_buffer;
2231
2232     assert(sizeof(pp_static_parameter) == 128);
2233     dri_bo_map(pp_context->curbe.bo, 1);
2234     assert(pp_context->curbe.bo->virtual);
2235     constant_buffer = pp_context->curbe.bo->virtual;
2236     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
2237     dri_bo_unmap(pp_context->curbe.bo);
2238 }
2239
2240 static void
2241 gen6_pp_states_setup(VADriverContextP ctx)
2242 {
2243     struct i965_driver_data *i965 = i965_driver_data(ctx);
2244     struct i965_post_processing_context *pp_context = i965->pp_context;
2245
2246     gen6_pp_binding_table(pp_context);
2247     gen6_pp_interface_descriptor_table(pp_context);
2248     gen6_pp_upload_constants(pp_context);
2249 }
2250
2251 static void
2252 gen6_pp_pipeline_select(VADriverContextP ctx)
2253 {
2254     BEGIN_BATCH(ctx, 1);
2255     OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
2256     ADVANCE_BATCH(ctx);
2257 }
2258
2259 static void
2260 gen6_pp_state_base_address(VADriverContextP ctx)
2261 {
2262     BEGIN_BATCH(ctx, 10);
2263     OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | (10 - 2));
2264     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2265     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2266     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2267     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2268     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2269     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2270     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2271     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2272     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2273     ADVANCE_BATCH(ctx);
2274 }
2275
2276 static void
2277 gen6_pp_vfe_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2278 {
2279     BEGIN_BATCH(ctx, 8);
2280     OUT_BATCH(ctx, CMD_MEDIA_VFE_STATE | (8 - 2));
2281     OUT_BATCH(ctx, 0);
2282     OUT_BATCH(ctx,
2283               (pp_context->urb.num_vfe_entries - 1) << 16 |
2284               pp_context->urb.num_vfe_entries << 8);
2285     OUT_BATCH(ctx, 0);
2286     OUT_BATCH(ctx,
2287               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* in 256 bits unit */
2288               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 - 1));            /* in 256 bits unit */
2289     OUT_BATCH(ctx, 0);
2290     OUT_BATCH(ctx, 0);
2291     OUT_BATCH(ctx, 0);
2292     ADVANCE_BATCH(ctx);
2293 }
2294
2295 static void
2296 gen6_pp_curbe_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2297 {
2298     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512 <= pp_context->curbe.bo->size);
2299
2300     BEGIN_BATCH(ctx, 4);
2301     OUT_BATCH(ctx, CMD_MEDIA_CURBE_LOAD | (4 - 2));
2302     OUT_BATCH(ctx, 0);
2303     OUT_BATCH(ctx,
2304               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512);
2305     OUT_RELOC(ctx, 
2306               pp_context->curbe.bo,
2307               I915_GEM_DOMAIN_INSTRUCTION, 0,
2308               0);
2309     ADVANCE_BATCH(ctx);
2310 }
2311
2312 static void
2313 gen6_interface_descriptor_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2314 {
2315     BEGIN_BATCH(ctx, 4);
2316     OUT_BATCH(ctx, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
2317     OUT_BATCH(ctx, 0);
2318     OUT_BATCH(ctx,
2319               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
2320     OUT_RELOC(ctx, 
2321               pp_context->idrt.bo,
2322               I915_GEM_DOMAIN_INSTRUCTION, 0,
2323               0);
2324     ADVANCE_BATCH(ctx);
2325 }
2326
2327 static void
2328 gen6_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2329 {
2330     int x, x_steps, y, y_steps;
2331
2332     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
2333     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
2334
2335     for (y = 0; y < y_steps; y++) {
2336         for (x = 0; x < x_steps; x++) {
2337             if (!pp_context->pp_set_block_parameter(&pp_context->private_context, x, y)) {
2338                 BEGIN_BATCH(ctx, 22);
2339                 OUT_BATCH(ctx, CMD_MEDIA_OBJECT | 20);
2340                 OUT_BATCH(ctx, 0);
2341                 OUT_BATCH(ctx, 0); /* no indirect data */
2342                 OUT_BATCH(ctx, 0);
2343                 OUT_BATCH(ctx, 0); /* scoreboard */
2344                 OUT_BATCH(ctx, 0);
2345
2346                 /* inline data grf 5-6 */
2347                 assert(sizeof(pp_inline_parameter) == 64);
2348                 intel_batchbuffer_data(ctx, &pp_inline_parameter, sizeof(pp_inline_parameter));
2349
2350                 ADVANCE_BATCH(ctx);
2351             }
2352         }
2353     }
2354 }
2355
2356 static void
2357 gen6_pp_pipeline_setup(VADriverContextP ctx)
2358 {
2359     struct i965_driver_data *i965 = i965_driver_data(ctx);
2360     struct i965_post_processing_context *pp_context = i965->pp_context;
2361
2362     intel_batchbuffer_start_atomic(ctx, 0x1000);
2363     intel_batchbuffer_emit_mi_flush(ctx);
2364     gen6_pp_pipeline_select(ctx);
2365     gen6_pp_curbe_load(ctx, pp_context);
2366     gen6_interface_descriptor_load(ctx, pp_context);
2367     gen6_pp_state_base_address(ctx);
2368     gen6_pp_vfe_state(ctx, pp_context);
2369     gen6_pp_object_walker(ctx, pp_context);
2370     intel_batchbuffer_end_atomic(ctx);
2371 }
2372
2373 static void
2374 gen6_post_processing(VADriverContextP ctx,
2375                      VASurfaceID surface,
2376                      int input,
2377                      short srcx,
2378                      short srcy,
2379                      unsigned short srcw,
2380                      unsigned short srch,
2381                      short destx,
2382                      short desty,
2383                      unsigned short destw,
2384                      unsigned short desth,
2385                      int pp_index)
2386 {
2387     gen6_pp_initialize(ctx, surface, input,
2388                        srcx, srcy, srcw, srch,
2389                        destx, desty, destw, desth,
2390                        pp_index);
2391     gen6_pp_states_setup(ctx);
2392     gen6_pp_pipeline_setup(ctx);
2393 }
2394
2395 static void
2396 i965_post_processing_internal(VADriverContextP ctx,
2397                               VASurfaceID surface,
2398                               int input,
2399                               short srcx,
2400                               short srcy,
2401                               unsigned short srcw,
2402                               unsigned short srch,
2403                               short destx,
2404                               short desty,
2405                               unsigned short destw,
2406                               unsigned short desth,
2407                               int pp_index)
2408 {
2409     struct i965_driver_data *i965 = i965_driver_data(ctx);
2410
2411     if (IS_GEN6(i965->intel.device_id))
2412         gen6_post_processing(ctx, surface, input,
2413                              srcx, srcy, srcw, srch,
2414                              destx, desty, destw, desth,
2415                              pp_index);
2416     else
2417         ironlake_post_processing(ctx, surface, input,
2418                                  srcx, srcy, srcw, srch,
2419                                  destx, desty, destw, desth,
2420                                  pp_index);
2421 }
2422
2423 void
2424 i965_post_processing(VADriverContextP ctx,
2425                      VASurfaceID surface,
2426                      short srcx,
2427                      short srcy,
2428                      unsigned short srcw,
2429                      unsigned short srch,
2430                      short destx,
2431                      short desty,
2432                      unsigned short destw,
2433                      unsigned short desth,
2434                      unsigned int flag)
2435 {
2436     struct i965_driver_data *i965 = i965_driver_data(ctx);
2437
2438     if (HAS_PP(i965)) {
2439         /* Currently only support post processing for NV12 surface */
2440         if (i965->render_state.interleaved_uv) {
2441             int internal_input = 0;
2442
2443             if (flag & I965_PP_FLAG_DEINTERLACING) {
2444                 i965_post_processing_internal(ctx, surface, internal_input,
2445                                               srcx, srcy, srcw, srch,
2446                                               destx, desty, destw, desth,
2447                                               PP_NV12_DNDI);
2448                 internal_input = 1;
2449             }
2450
2451             if (flag & I965_PP_FLAG_AVS) {
2452                 i965_post_processing_internal(ctx, surface, internal_input,
2453                                               srcx, srcy, srcw, srch,
2454                                               destx, desty, destw, desth,
2455                                               PP_NV12_AVS);
2456             }
2457         }
2458     }
2459 }       
2460
2461 Bool
2462 i965_post_processing_terminate(VADriverContextP ctx)
2463 {
2464     struct i965_driver_data *i965 = i965_driver_data(ctx);
2465     struct i965_post_processing_context *pp_context = i965->pp_context;
2466     int i;
2467
2468     if (HAS_PP(i965)) {
2469         if (pp_context) {
2470             dri_bo_unreference(pp_context->curbe.bo);
2471             pp_context->curbe.bo = NULL;
2472
2473             for (i = 0; i < MAX_PP_SURFACES; i++) {
2474                 dri_bo_unreference(pp_context->surfaces[i].ss_bo);
2475                 pp_context->surfaces[i].ss_bo = NULL;
2476
2477                 dri_bo_unreference(pp_context->surfaces[i].s_bo);
2478                 pp_context->surfaces[i].s_bo = NULL;
2479             }
2480
2481             dri_bo_unreference(pp_context->sampler_state_table.bo);
2482             pp_context->sampler_state_table.bo = NULL;
2483
2484             dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
2485             pp_context->sampler_state_table.bo_8x8 = NULL;
2486
2487             dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
2488             pp_context->sampler_state_table.bo_8x8_uv = NULL;
2489
2490             dri_bo_unreference(pp_context->binding_table.bo);
2491             pp_context->binding_table.bo = NULL;
2492
2493             dri_bo_unreference(pp_context->idrt.bo);
2494             pp_context->idrt.bo = NULL;
2495             pp_context->idrt.num_interface_descriptors = 0;
2496
2497             dri_bo_unreference(pp_context->vfe_state.bo);
2498             pp_context->vfe_state.bo = NULL;
2499
2500             dri_bo_unreference(pp_context->stmm.bo);
2501             pp_context->stmm.bo = NULL;
2502
2503             for (i = 0; i < NUM_PP_MODULES; i++) {
2504                 struct pp_module *pp_module = &pp_context->pp_modules[i];
2505
2506                 dri_bo_unreference(pp_module->kernel.bo);
2507                 pp_module->kernel.bo = NULL;
2508             }
2509
2510             free(pp_context);
2511         }
2512
2513         i965->pp_context = NULL;
2514     }
2515
2516     return True;
2517 }
2518
2519 Bool
2520 i965_post_processing_init(VADriverContextP ctx)
2521 {
2522     struct i965_driver_data *i965 = i965_driver_data(ctx);
2523     struct i965_post_processing_context *pp_context = i965->pp_context;
2524     int i;
2525
2526     if (HAS_PP(i965)) {
2527         if (pp_context == NULL) {
2528             pp_context = calloc(1, sizeof(*pp_context));
2529             i965->pp_context = pp_context;
2530
2531             pp_context->urb.size = URB_SIZE((&i965->intel));
2532             pp_context->urb.num_vfe_entries = 32;
2533             pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
2534             pp_context->urb.num_cs_entries = 1;
2535             pp_context->urb.size_cs_entry = 2;      /* in 512 bits unit */
2536             pp_context->urb.vfe_start = 0;
2537             pp_context->urb.cs_start = pp_context->urb.vfe_start + 
2538                 pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
2539             assert(pp_context->urb.cs_start + 
2540                    pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
2541
2542             assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
2543             assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
2544
2545             if (IS_GEN6(i965->intel.device_id))
2546                 memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
2547             else if (IS_IRONLAKE(i965->intel.device_id))
2548                 memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
2549
2550             for (i = 0; i < NUM_PP_MODULES; i++) {
2551                 struct pp_module *pp_module = &pp_context->pp_modules[i];
2552                 dri_bo_unreference(pp_module->kernel.bo);
2553                 pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
2554                                                     pp_module->kernel.name,
2555                                                     pp_module->kernel.size,
2556                                                     4096);
2557                 assert(pp_module->kernel.bo);
2558                 dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
2559             }
2560         }
2561     }
2562
2563     return True;
2564 }