b6db19a1fa84499aca27a791067eb194d461782f
[platform/upstream/libva.git] / i965_drv_video / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include <va/va_backend.h>
35
36 #include "intel_batchbuffer.h"
37 #include "intel_driver.h"
38 #include "i965_defines.h"
39 #include "i965_structs.h"
40
41 #include "i965_post_processing.h"
42 #include "i965_render.h"
43 #include "i965_drv_video.h"
44
45 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
46                      IS_GEN6((ctx)->intel.device_id))
47
48 struct pp_module
49 {
50     /* kernel */
51     char *name;
52     int interface;
53     unsigned int (*bin)[4];
54     int size;
55     dri_bo *bo;
56
57     /* others */
58     void (*initialize)(VADriverContextP ctx, VASurfaceID surface, int input,
59                        unsigned short srcw, unsigned short srch,
60                        unsigned short destw, unsigned short desth);
61 };
62
63 static uint32_t pp_null_gen5[][4] = {
64 #include "shaders/post_processing/null.g4b.gen5"
65 };
66
67 static uint32_t pp_nv12_load_save_gen5[][4] = {
68 #include "shaders/post_processing/nv12_load_save_nv12.g4b.gen5"
69 };
70
71 static uint32_t pp_nv12_scaling_gen5[][4] = {
72 #include "shaders/post_processing/nv12_scaling_nv12.g4b.gen5"
73 };
74
75 static uint32_t pp_nv12_avs_gen5[][4] = {
76 #include "shaders/post_processing/nv12_avs_nv12.g4b.gen5"
77 };
78
79 static uint32_t pp_nv12_dndi_gen5[][4] = {
80 #include "shaders/post_processing/nv12_dndi_nv12.g4b.gen5"
81 };
82
83 static void pp_null_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
84                                unsigned short srcw, unsigned short srch,
85                                unsigned short destw, unsigned short desth);
86 static void pp_nv12_avs_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
87                                    unsigned short srcw, unsigned short srch,
88                                    unsigned short destw, unsigned short desth);
89 static void pp_nv12_scaling_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
90                                        unsigned short srcw, unsigned short srch,
91                                        unsigned short destw, unsigned short desth);
92 static void pp_nv12_load_save_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
93                                          unsigned short srcw, unsigned short srch,
94                                          unsigned short destw, unsigned short desth);
95 static void pp_nv12_dndi_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
96                                     unsigned short srcw, unsigned short srch,
97                                     unsigned short destw, unsigned short desth);
98
99 static struct pp_module pp_modules_gen5[] = {
100     {
101         "NULL module (for testing)",
102         PP_NULL,
103         pp_null_gen5,
104         sizeof(pp_null_gen5),
105         NULL,
106         pp_null_initialize,
107     },
108
109     {
110         "NV12 Load & Save module",
111         PP_NV12_LOAD_SAVE,
112         pp_nv12_load_save_gen5,
113         sizeof(pp_nv12_load_save_gen5),
114         NULL,
115         pp_nv12_load_save_initialize,
116     },
117
118     {
119         "NV12 Scaling module",
120         PP_NV12_SCALING,
121         pp_nv12_scaling_gen5,
122         sizeof(pp_nv12_scaling_gen5),
123         NULL,
124         pp_nv12_scaling_initialize,
125     },
126
127     {
128         "NV12 AVS module",
129         PP_NV12_AVS,
130         pp_nv12_avs_gen5,
131         sizeof(pp_nv12_avs_gen5),
132         NULL,
133         pp_nv12_avs_initialize,
134     },
135
136     {
137         "NV12 DNDI module",
138         PP_NV12_DNDI,
139         pp_nv12_dndi_gen5,
140         sizeof(pp_nv12_dndi_gen5),
141         NULL,
142         pp_nv12_dndi_initialize,
143     },
144 };
145
146 static uint32_t pp_null_gen6[][4] = {
147 #include "shaders/post_processing/null.g6b"
148 };
149
150 static uint32_t pp_nv12_load_save_gen6[][4] = {
151 #include "shaders/post_processing/nv12_load_save_nv12.g6b"
152 };
153
154 static uint32_t pp_nv12_scaling_gen6[][4] = {
155 #include "shaders/post_processing/nv12_scaling_nv12.g6b"
156 };
157
158 static uint32_t pp_nv12_avs_gen6[][4] = {
159 #include "shaders/post_processing/nv12_avs_nv12.g6b"
160 };
161
162 static uint32_t pp_nv12_dndi_gen6[][4] = {
163 #include "shaders/post_processing/nv12_dndi_nv12.g6b"
164 };
165
166 static struct pp_module pp_modules_gen6[] = {
167     {
168         "NULL module (for testing)",
169         PP_NULL,
170         pp_null_gen6,
171         sizeof(pp_null_gen6),
172         NULL,
173         pp_null_initialize,
174     },
175
176     {
177         "NV12 Load & Save module",
178         PP_NV12_LOAD_SAVE,
179         pp_nv12_load_save_gen6,
180         sizeof(pp_nv12_load_save_gen6),
181         NULL,
182         pp_nv12_load_save_initialize,
183     },
184
185     {
186         "NV12 Scaling module",
187         PP_NV12_SCALING,
188         pp_nv12_scaling_gen6,
189         sizeof(pp_nv12_scaling_gen6),
190         NULL,
191         pp_nv12_scaling_initialize,
192     },
193
194     {
195         "NV12 AVS module",
196         PP_NV12_AVS,
197         pp_nv12_avs_gen6,
198         sizeof(pp_nv12_avs_gen6),
199         NULL,
200         pp_nv12_avs_initialize,
201     },
202
203     {
204         "NV12 DNDI module",
205         PP_NV12_DNDI,
206         pp_nv12_dndi_gen6,
207         sizeof(pp_nv12_dndi_gen6),
208         NULL,
209         pp_nv12_dndi_initialize,
210     },
211 };
212
213 #define NUM_PP_MODULES ARRAY_ELEMS(pp_modules_gen5)
214
215 static struct pp_module *pp_modules = NULL;
216
217 struct pp_static_parameter
218 {
219     struct {
220         /* Procamp r1.0 */
221         float procamp_constant_c0;
222         
223         /* Load and Same r1.1 */
224         unsigned int source_packed_y_offset:8;
225         unsigned int source_packed_u_offset:8;
226         unsigned int source_packed_v_offset:8;
227         unsigned int pad0:8;
228
229         union {
230             /* Load and Save r1.2 */
231             struct {
232                 unsigned int destination_packed_y_offset:8;
233                 unsigned int destination_packed_u_offset:8;
234                 unsigned int destination_packed_v_offset:8;
235                 unsigned int pad0:8;
236             } load_and_save;
237
238             /* CSC r1.2 */
239             struct {
240                 unsigned int destination_rgb_format:8;
241                 unsigned int pad0:24;
242             } csc;
243         } r1_2;
244         
245         /* Procamp r1.3 */
246         float procamp_constant_c1;
247
248         /* Procamp r1.4 */
249         float procamp_constant_c2;
250
251         /* DI r1.5 */
252         unsigned int statistics_surface_picth:16;  /* Devided by 2 */
253         unsigned int pad1:16;
254
255         union {
256             /* DI r1.6 */
257             struct {
258                 unsigned int pad0:24;
259                 unsigned int top_field_first:8;
260             } di;
261
262             /* AVS/Scaling r1.6 */
263             float normalized_video_y_scaling_step;
264         } r1_6;
265
266         /* Procamp r1.7 */
267         float procamp_constant_c5;
268     } grf1;
269     
270     struct {
271         /* Procamp r2.0 */
272         float procamp_constant_c3;
273
274         /* MBZ r2.1*/
275         unsigned int pad0;
276
277         /* WG+CSC r2.2 */
278         float wg_csc_constant_c4;
279
280         /* WG+CSC r2.3 */
281         float wg_csc_constant_c8;
282
283         /* Procamp r2.4 */
284         float procamp_constant_c4;
285
286         /* MBZ r2.5 */
287         unsigned int pad1;
288
289         /* MBZ r2.6 */
290         unsigned int pad2;
291
292         /* WG+CSC r2.7 */
293         float wg_csc_constant_c9;
294     } grf2;
295
296     struct {
297         /* WG+CSC r3.0 */
298         float wg_csc_constant_c0;
299
300         /* Blending r3.1 */
301         float scaling_step_ratio;
302
303         /* Blending r3.2 */
304         float normalized_alpha_y_scaling;
305         
306         /* WG+CSC r3.3 */
307         float wg_csc_constant_c4;
308
309         /* WG+CSC r3.4 */
310         float wg_csc_constant_c1;
311
312         /* ALL r3.5 */
313         int horizontal_origin_offset:16;
314         int vertical_origin_offset:16;
315
316         /* Shared r3.6*/
317         union {
318             /* Color filll */
319             unsigned int color_pixel;
320
321             /* WG+CSC */
322             float wg_csc_constant_c2;
323         } r3_6;
324
325         /* WG+CSC r3.7 */
326         float wg_csc_constant_c3;
327     } grf3;
328
329     struct {
330         /* WG+CSC r4.0 */
331         float wg_csc_constant_c6;
332
333         /* ALL r4.1 MBZ ???*/
334         unsigned int pad0;
335
336         /* Shared r4.2 */
337         union {
338             /* AVS */
339             struct {
340                 unsigned int pad1:15;
341                 unsigned int nlas:1;
342                 unsigned int pad2:16;
343             } avs;
344
345             /* DI */
346             struct {
347                 unsigned int motion_history_coefficient_m2:8;
348                 unsigned int motion_history_coefficient_m1:8;
349                 unsigned int pad0:16;
350             } di;
351         } r4_2;
352
353         /* WG+CSC r4.3 */
354         float wg_csc_constant_c7;
355
356         /* WG+CSC r4.4 */
357         float wg_csc_constant_c10;
358
359         /* AVS r4.5 */
360         float source_video_frame_normalized_horizontal_origin;
361
362         /* MBZ r4.6 */
363         unsigned int pad1;
364
365         /* WG+CSC r4.7 */
366         float wg_csc_constant_c11;
367     } grf4;
368 };
369
370 struct pp_inline_parameter
371 {
372     struct {
373         /* ALL r5.0 */
374         int destination_block_horizontal_origin:16;
375         int destination_block_vertical_origin:16;
376
377         /* Shared r5.1 */
378         union {
379             /* AVS/Scaling */
380             float source_surface_block_normalized_horizontal_origin;
381
382             /* FMD */
383             struct {
384                 unsigned int variance_surface_vertical_origin:16;
385                 unsigned int pad0:16;
386             } fmd;
387         } r5_1; 
388
389         /* AVS/Scaling r5.2 */
390         float source_surface_block_normalized_vertical_origin;
391
392         /* Alpha r5.3 */
393         float alpha_surface_block_normalized_horizontal_origin;
394
395         /* Alpha r5.4 */
396         float alpha_surface_block_normalized_vertical_origin;
397
398         /* Alpha r5.5 */
399         unsigned int alpha_mask_x:16;
400         unsigned int alpha_mask_y:8;
401         unsigned int block_count_x:8;
402
403         /* r5.6 */
404         unsigned int block_horizontal_mask:16;
405         unsigned int block_vertical_mask:8;
406         unsigned int number_blocks:8;
407
408         /* AVS/Scaling r5.7 */
409         float normalized_video_x_scaling_step;
410     } grf5;
411
412     struct {
413         /* AVS r6.0 */
414         float video_step_delta;
415
416         /* r6.1-r6.7 */
417         unsigned int padx[7];
418     } grf6;
419 };
420
421 static struct pp_static_parameter pp_static_parameter;
422 static struct pp_inline_parameter pp_inline_parameter;
423
424 static void
425 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
426 {
427     switch (tiling) {
428     case I915_TILING_NONE:
429         ss->ss3.tiled_surface = 0;
430         ss->ss3.tile_walk = 0;
431         break;
432     case I915_TILING_X:
433         ss->ss3.tiled_surface = 1;
434         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
435         break;
436     case I915_TILING_Y:
437         ss->ss3.tiled_surface = 1;
438         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
439         break;
440     }
441 }
442
443 static void
444 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
445 {
446     switch (tiling) {
447     case I915_TILING_NONE:
448         ss->ss2.tiled_surface = 0;
449         ss->ss2.tile_walk = 0;
450         break;
451     case I915_TILING_X:
452         ss->ss2.tiled_surface = 1;
453         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
454         break;
455     case I915_TILING_Y:
456         ss->ss2.tiled_surface = 1;
457         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
458         break;
459     }
460 }
461
462 static void
463 ironlake_pp_surface_state(struct i965_post_processing_context *pp_context)
464 {
465
466 }
467
468 static void
469 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
470 {
471     struct i965_interface_descriptor *desc;
472     dri_bo *bo;
473     int pp_index = pp_context->current_pp;
474
475     bo = pp_context->idrt.bo;
476     dri_bo_map(bo, 1);
477     assert(bo->virtual);
478     desc = bo->virtual;
479     memset(desc, 0, sizeof(*desc));
480     desc->desc0.grf_reg_blocks = 10;
481     desc->desc0.kernel_start_pointer = pp_modules[pp_index].bo->offset >> 6; /* reloc */
482     desc->desc1.const_urb_entry_read_offset = 0;
483     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
484     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
485     desc->desc2.sampler_count = 0;
486     desc->desc3.binding_table_entry_count = 0;
487     desc->desc3.binding_table_pointer = 
488         pp_context->binding_table.bo->offset >> 5; /*reloc */
489
490     dri_bo_emit_reloc(bo,
491                       I915_GEM_DOMAIN_INSTRUCTION, 0,
492                       desc->desc0.grf_reg_blocks,
493                       offsetof(struct i965_interface_descriptor, desc0),
494                       pp_modules[pp_index].bo);
495
496     dri_bo_emit_reloc(bo,
497                       I915_GEM_DOMAIN_INSTRUCTION, 0,
498                       desc->desc2.sampler_count << 2,
499                       offsetof(struct i965_interface_descriptor, desc2),
500                       pp_context->sampler_state_table.bo);
501
502     dri_bo_emit_reloc(bo,
503                       I915_GEM_DOMAIN_INSTRUCTION, 0,
504                       desc->desc3.binding_table_entry_count,
505                       offsetof(struct i965_interface_descriptor, desc3),
506                       pp_context->binding_table.bo);
507
508     dri_bo_unmap(bo);
509     pp_context->idrt.num_interface_descriptors++;
510 }
511
512 static void
513 ironlake_pp_binding_table(struct i965_post_processing_context *pp_context)
514 {
515     unsigned int *binding_table;
516     dri_bo *bo = pp_context->binding_table.bo;
517     int i;
518
519     dri_bo_map(bo, 1);
520     assert(bo->virtual);
521     binding_table = bo->virtual;
522     memset(binding_table, 0, bo->size);
523
524     for (i = 0; i < MAX_PP_SURFACES; i++) {
525         if (pp_context->surfaces[i].ss_bo) {
526             assert(pp_context->surfaces[i].s_bo);
527
528             binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
529             dri_bo_emit_reloc(bo,
530                               I915_GEM_DOMAIN_INSTRUCTION, 0,
531                               0,
532                               i * sizeof(*binding_table),
533                               pp_context->surfaces[i].ss_bo);
534         }
535     
536     }
537
538     dri_bo_unmap(bo);
539 }
540
541 static void
542 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
543 {
544     struct i965_vfe_state *vfe_state;
545     dri_bo *bo;
546
547     bo = pp_context->vfe_state.bo;
548     dri_bo_map(bo, 1);
549     assert(bo->virtual);
550     vfe_state = bo->virtual;
551     memset(vfe_state, 0, sizeof(*vfe_state));
552     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
553     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
554     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
555     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
556     vfe_state->vfe1.children_present = 0;
557     vfe_state->vfe2.interface_descriptor_base = 
558         pp_context->idrt.bo->offset >> 4; /* reloc */
559     dri_bo_emit_reloc(bo,
560                       I915_GEM_DOMAIN_INSTRUCTION, 0,
561                       0,
562                       offsetof(struct i965_vfe_state, vfe2),
563                       pp_context->idrt.bo);
564     dri_bo_unmap(bo);
565 }
566
567 static void
568 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
569 {
570     unsigned char *constant_buffer;
571
572     assert(sizeof(pp_static_parameter) == 128);
573     dri_bo_map(pp_context->curbe.bo, 1);
574     assert(pp_context->curbe.bo->virtual);
575     constant_buffer = pp_context->curbe.bo->virtual;
576     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
577     dri_bo_unmap(pp_context->curbe.bo);
578 }
579
580 static void
581 ironlake_pp_states_setup(VADriverContextP ctx)
582 {
583     struct i965_driver_data *i965 = i965_driver_data(ctx);
584     struct i965_post_processing_context *pp_context = i965->pp_context;
585
586     ironlake_pp_surface_state(pp_context);
587     ironlake_pp_binding_table(pp_context);
588     ironlake_pp_interface_descriptor_table(pp_context);
589     ironlake_pp_vfe_state(pp_context);
590     ironlake_pp_upload_constants(pp_context);
591 }
592
593 static void
594 ironlake_pp_pipeline_select(VADriverContextP ctx)
595 {
596     BEGIN_BATCH(ctx, 1);
597     OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
598     ADVANCE_BATCH(ctx);
599 }
600
601 static void
602 ironlake_pp_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
603 {
604     unsigned int vfe_fence, cs_fence;
605
606     vfe_fence = pp_context->urb.cs_start;
607     cs_fence = pp_context->urb.size;
608
609     BEGIN_BATCH(ctx, 3);
610     OUT_BATCH(ctx, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
611     OUT_BATCH(ctx, 0);
612     OUT_BATCH(ctx, 
613               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
614               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
615     ADVANCE_BATCH(ctx);
616 }
617
618 static void
619 ironlake_pp_state_base_address(VADriverContextP ctx)
620 {
621     BEGIN_BATCH(ctx, 8);
622     OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6);
623     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
624     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
625     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
626     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
627     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
628     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
629     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
630     ADVANCE_BATCH(ctx);
631 }
632
633 static void
634 ironlake_pp_state_pointers(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
635 {
636     BEGIN_BATCH(ctx, 3);
637     OUT_BATCH(ctx, CMD_MEDIA_STATE_POINTERS | 1);
638     OUT_BATCH(ctx, 0);
639     OUT_RELOC(ctx, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
640     ADVANCE_BATCH(ctx);
641 }
642
643 static void 
644 ironlake_pp_cs_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
645 {
646     BEGIN_BATCH(ctx, 2);
647     OUT_BATCH(ctx, CMD_CS_URB_STATE | 0);
648     OUT_BATCH(ctx,
649               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
650               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
651     ADVANCE_BATCH(ctx);
652 }
653
654 static void
655 ironlake_pp_constant_buffer(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
656 {
657     BEGIN_BATCH(ctx, 2);
658     OUT_BATCH(ctx, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
659     OUT_RELOC(ctx, pp_context->curbe.bo,
660               I915_GEM_DOMAIN_INSTRUCTION, 0,
661               pp_context->urb.size_cs_entry - 1);
662     ADVANCE_BATCH(ctx);    
663 }
664
665 static void
666 ironlake_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
667 {
668     int x, x_steps, y, y_steps;
669
670     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
671     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
672
673     for (y = 0; y < y_steps; y++) {
674         for (x = 0; x < x_steps; x++) {
675             if (!pp_context->pp_set_block_parameter(&pp_context->private_context, x, y)) {
676                 BEGIN_BATCH(ctx, 20);
677                 OUT_BATCH(ctx, CMD_MEDIA_OBJECT | 18);
678                 OUT_BATCH(ctx, 0);
679                 OUT_BATCH(ctx, 0); /* no indirect data */
680                 OUT_BATCH(ctx, 0);
681
682                 /* inline data grf 5-6 */
683                 assert(sizeof(pp_inline_parameter) == 64);
684                 intel_batchbuffer_data(ctx, &pp_inline_parameter, sizeof(pp_inline_parameter));
685
686                 ADVANCE_BATCH(ctx);
687             }
688         }
689     }
690 }
691
692 static void
693 ironlake_pp_pipeline_setup(VADriverContextP ctx)
694 {
695     struct i965_driver_data *i965 = i965_driver_data(ctx);
696     struct i965_post_processing_context *pp_context = i965->pp_context;
697
698     intel_batchbuffer_start_atomic(ctx, 0x1000);
699     intel_batchbuffer_emit_mi_flush(ctx);
700     ironlake_pp_pipeline_select(ctx);
701     ironlake_pp_state_base_address(ctx);
702     ironlake_pp_state_pointers(ctx, pp_context);
703     ironlake_pp_urb_layout(ctx, pp_context);
704     ironlake_pp_cs_urb_layout(ctx, pp_context);
705     ironlake_pp_constant_buffer(ctx, pp_context);
706     ironlake_pp_object_walker(ctx, pp_context);
707     intel_batchbuffer_end_atomic(ctx);
708 }
709
710 static int
711 pp_null_x_steps(void *private_context)
712 {
713     return 1;
714 }
715
716 static int
717 pp_null_y_steps(void *private_context)
718 {
719     return 1;
720 }
721
722 static int
723 pp_null_set_block_parameter(void *private_context, int x, int y)
724 {
725     return 0;
726 }
727
728 static void
729 pp_null_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
730                    unsigned short srcw, unsigned short srch,
731                    unsigned short destw, unsigned short desth)
732 {
733     struct i965_driver_data *i965 = i965_driver_data(ctx);
734     struct i965_post_processing_context *pp_context = i965->pp_context;
735     struct object_surface *obj_surface;
736
737     /* surface */
738     obj_surface = SURFACE(surface);
739     dri_bo_unreference(obj_surface->pp_out_bo);
740     obj_surface->pp_out_bo = obj_surface->bo;
741     dri_bo_reference(obj_surface->pp_out_bo);
742     assert(obj_surface->pp_out_bo);
743     obj_surface->pp_out_width = obj_surface->width;
744     obj_surface->pp_out_height = obj_surface->height;
745     obj_surface->orig_pp_out_width = obj_surface->orig_width;
746     obj_surface->orig_pp_out_height = obj_surface->orig_height;
747
748     /* private function & data */
749     pp_context->pp_x_steps = pp_null_x_steps;
750     pp_context->pp_y_steps = pp_null_y_steps;
751     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
752 }
753
754 static int
755 pp_load_save_x_steps(void *private_context)
756 {
757     return 1;
758 }
759
760 static int
761 pp_load_save_y_steps(void *private_context)
762 {
763     struct pp_load_save_context *pp_load_save_context = private_context;
764
765     return pp_load_save_context->dest_h / 8;
766 }
767
768 static int
769 pp_load_save_set_block_parameter(void *private_context, int x, int y)
770 {
771     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
772     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
773     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
774     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
775
776     return 0;
777 }
778
779 static void
780 pp_nv12_load_save_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
781                              unsigned short srcw, unsigned short srch,
782                              unsigned short destw, unsigned short desth)
783 {
784     struct i965_driver_data *i965 = i965_driver_data(ctx);
785     struct i965_post_processing_context *pp_context = i965->pp_context;
786     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
787     struct object_surface *obj_surface;
788     struct i965_surface_state *ss;
789     dri_bo *bo;
790     int index, w, h;
791     int orig_w, orig_h;
792     unsigned int tiling, swizzle;
793
794     /* surface */
795     obj_surface = SURFACE(surface);
796     orig_w = obj_surface->orig_width;
797     orig_h = obj_surface->orig_height;
798     w = obj_surface->width;
799     h = obj_surface->height;
800
801     dri_bo_unreference(obj_surface->pp_out_bo);
802     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
803                                           "intermediate surface",
804                                           SIZE_YUV420(w, h),
805                                           4096);
806     assert(obj_surface->pp_out_bo);
807     obj_surface->pp_out_width = obj_surface->width;
808     obj_surface->pp_out_height = obj_surface->height;
809     obj_surface->orig_pp_out_width = obj_surface->orig_width;
810     obj_surface->orig_pp_out_height = obj_surface->orig_height;
811
812     /* source Y surface index 1 */
813     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
814
815     index = 1;
816     pp_context->surfaces[index].s_bo = obj_surface->bo;
817     dri_bo_reference(pp_context->surfaces[index].s_bo);
818     bo = dri_bo_alloc(i965->intel.bufmgr, 
819                       "surface state", 
820                       sizeof(struct i965_surface_state), 
821                       4096);
822     assert(bo);
823     pp_context->surfaces[index].ss_bo = bo;
824     dri_bo_map(bo, True);
825     assert(bo->virtual);
826     ss = bo->virtual;
827     memset(ss, 0, sizeof(*ss));
828     ss->ss0.surface_type = I965_SURFACE_2D;
829     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
830     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
831     ss->ss2.width = orig_w / 4 - 1;
832     ss->ss2.height = orig_h - 1;
833     ss->ss3.pitch = w - 1;
834     pp_set_surface_tiling(ss, tiling);
835     dri_bo_emit_reloc(bo,
836                       I915_GEM_DOMAIN_RENDER, 
837                       0,
838                       0,
839                       offsetof(struct i965_surface_state, ss1),
840                       pp_context->surfaces[index].s_bo);
841     dri_bo_unmap(bo);
842
843     /* source UV surface index 2 */
844     index = 2;
845     pp_context->surfaces[index].s_bo = obj_surface->bo;
846     dri_bo_reference(pp_context->surfaces[index].s_bo);
847     bo = dri_bo_alloc(i965->intel.bufmgr, 
848                       "surface state", 
849                       sizeof(struct i965_surface_state), 
850                       4096);
851     assert(bo);
852     pp_context->surfaces[index].ss_bo = bo;
853     dri_bo_map(bo, True);
854     assert(bo->virtual);
855     ss = bo->virtual;
856     memset(ss, 0, sizeof(*ss));
857     ss->ss0.surface_type = I965_SURFACE_2D;
858     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
859     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
860     ss->ss2.width = orig_w / 4 - 1;
861     ss->ss2.height = orig_h / 2 - 1;
862     ss->ss3.pitch = w - 1;
863     pp_set_surface_tiling(ss, tiling);
864     dri_bo_emit_reloc(bo,
865                       I915_GEM_DOMAIN_RENDER, 
866                       0,
867                       w * h,
868                       offsetof(struct i965_surface_state, ss1),
869                       pp_context->surfaces[index].s_bo);
870     dri_bo_unmap(bo);
871
872     /* destination Y surface index 7 */
873     index = 7;
874     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
875     dri_bo_reference(pp_context->surfaces[index].s_bo);
876     bo = dri_bo_alloc(i965->intel.bufmgr, 
877                       "surface state", 
878                       sizeof(struct i965_surface_state), 
879                       4096);
880     assert(bo);
881     pp_context->surfaces[index].ss_bo = bo;
882     dri_bo_map(bo, True);
883     assert(bo->virtual);
884     ss = bo->virtual;
885     memset(ss, 0, sizeof(*ss));
886     ss->ss0.surface_type = I965_SURFACE_2D;
887     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
888     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
889     ss->ss2.width = orig_w / 4 - 1;
890     ss->ss2.height = orig_h - 1;
891     ss->ss3.pitch = w - 1;
892     dri_bo_emit_reloc(bo,
893                       I915_GEM_DOMAIN_RENDER, 
894                       I915_GEM_DOMAIN_RENDER,
895                       0,
896                       offsetof(struct i965_surface_state, ss1),
897                       pp_context->surfaces[index].s_bo);
898     dri_bo_unmap(bo);
899
900     /* destination UV surface index 8 */
901     index = 8;
902     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
903     dri_bo_reference(pp_context->surfaces[index].s_bo);
904     bo = dri_bo_alloc(i965->intel.bufmgr, 
905                       "surface state", 
906                       sizeof(struct i965_surface_state), 
907                       4096);
908     assert(bo);
909     pp_context->surfaces[index].ss_bo = bo;
910     dri_bo_map(bo, True);
911     assert(bo->virtual);
912     ss = bo->virtual;
913     memset(ss, 0, sizeof(*ss));
914     ss->ss0.surface_type = I965_SURFACE_2D;
915     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
916     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
917     ss->ss2.width = orig_w / 4 - 1;
918     ss->ss2.height = orig_h / 2 - 1;
919     ss->ss3.pitch = w - 1;
920     dri_bo_emit_reloc(bo,
921                       I915_GEM_DOMAIN_RENDER, 
922                       I915_GEM_DOMAIN_RENDER,
923                       w * h,
924                       offsetof(struct i965_surface_state, ss1),
925                       pp_context->surfaces[index].s_bo);
926     dri_bo_unmap(bo);
927
928     /* private function & data */
929     pp_context->pp_x_steps = pp_load_save_x_steps;
930     pp_context->pp_y_steps = pp_load_save_y_steps;
931     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
932     pp_load_save_context->dest_h = h;
933     pp_load_save_context->dest_w = w;
934
935     pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
936     pp_inline_parameter.grf5.number_blocks = w / 16;
937 }
938
939 static int
940 pp_scaling_x_steps(void *private_context)
941 {
942     return 1;
943 }
944
945 static int
946 pp_scaling_y_steps(void *private_context)
947 {
948     struct pp_scaling_context *pp_scaling_context = private_context;
949
950     return pp_scaling_context->dest_h / 8;
951 }
952
953 static int
954 pp_scaling_set_block_parameter(void *private_context, int x, int y)
955 {
956     float src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
957     float src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
958
959     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16;
960     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8;
961     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
962     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
963     
964     return 0;
965 }
966
967 static void
968 pp_nv12_scaling_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
969                            unsigned short srcw, unsigned short srch,
970                            unsigned short destw, unsigned short desth)
971 {
972     struct i965_driver_data *i965 = i965_driver_data(ctx);
973     struct i965_post_processing_context *pp_context = i965->pp_context;
974     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
975     struct object_surface *obj_surface;
976     struct i965_sampler_state *sampler_state;
977     struct i965_surface_state *ss;
978     dri_bo *bo;
979     int index;
980     int w, h;
981     int orig_w, orig_h;
982     int pp_out_w, pp_out_h;
983     int orig_pp_out_w, orig_pp_out_h;
984     unsigned int tiling, swizzle;
985
986     /* surface */
987     obj_surface = SURFACE(surface);
988     orig_w = obj_surface->orig_width;
989     orig_h = obj_surface->orig_height;
990     w = obj_surface->width;
991     h = obj_surface->height;
992
993     orig_pp_out_w = destw;
994     orig_pp_out_h = desth;
995     pp_out_w = ALIGN(orig_pp_out_w, 16);
996     pp_out_h = ALIGN(orig_pp_out_h, 16);
997     dri_bo_unreference(obj_surface->pp_out_bo);
998     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
999                                           "intermediate surface",
1000                                           SIZE_YUV420(pp_out_w, pp_out_h),
1001                                           4096);
1002     assert(obj_surface->pp_out_bo);
1003     obj_surface->orig_pp_out_width = orig_pp_out_w;
1004     obj_surface->orig_pp_out_height = orig_pp_out_h;
1005     obj_surface->pp_out_width = pp_out_w;
1006     obj_surface->pp_out_height = pp_out_h;
1007
1008     /* source Y surface index 1 */
1009     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
1010
1011     index = 1;
1012     pp_context->surfaces[index].s_bo = obj_surface->bo;
1013     dri_bo_reference(pp_context->surfaces[index].s_bo);
1014     bo = dri_bo_alloc(i965->intel.bufmgr, 
1015                       "surface state", 
1016                       sizeof(struct i965_surface_state), 
1017                       4096);
1018     assert(bo);
1019     pp_context->surfaces[index].ss_bo = bo;
1020     dri_bo_map(bo, True);
1021     assert(bo->virtual);
1022     ss = bo->virtual;
1023     memset(ss, 0, sizeof(*ss));
1024     ss->ss0.surface_type = I965_SURFACE_2D;
1025     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1026     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1027     ss->ss2.width = orig_w - 1;
1028     ss->ss2.height = orig_h - 1;
1029     ss->ss3.pitch = w - 1;
1030     pp_set_surface_tiling(ss, tiling);
1031     dri_bo_emit_reloc(bo,
1032                       I915_GEM_DOMAIN_RENDER, 
1033                       0,
1034                       0,
1035                       offsetof(struct i965_surface_state, ss1),
1036                       pp_context->surfaces[index].s_bo);
1037     dri_bo_unmap(bo);
1038
1039     /* source UV surface index 2 */
1040     index = 2;
1041     pp_context->surfaces[index].s_bo = obj_surface->bo;
1042     dri_bo_reference(pp_context->surfaces[index].s_bo);
1043     bo = dri_bo_alloc(i965->intel.bufmgr, 
1044                       "surface state", 
1045                       sizeof(struct i965_surface_state), 
1046                       4096);
1047     assert(bo);
1048     pp_context->surfaces[index].ss_bo = bo;
1049     dri_bo_map(bo, True);
1050     assert(bo->virtual);
1051     ss = bo->virtual;
1052     memset(ss, 0, sizeof(*ss));
1053     ss->ss0.surface_type = I965_SURFACE_2D;
1054     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1055     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1056     ss->ss2.width = orig_w / 2 - 1;
1057     ss->ss2.height = orig_h / 2 - 1;
1058     ss->ss3.pitch = w - 1;
1059     pp_set_surface_tiling(ss, tiling);
1060     dri_bo_emit_reloc(bo,
1061                       I915_GEM_DOMAIN_RENDER, 
1062                       0,
1063                       w * h,
1064                       offsetof(struct i965_surface_state, ss1),
1065                       pp_context->surfaces[index].s_bo);
1066     dri_bo_unmap(bo);
1067
1068     /* destination Y surface index 7 */
1069     index = 7;
1070     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1071     dri_bo_reference(pp_context->surfaces[index].s_bo);
1072     bo = dri_bo_alloc(i965->intel.bufmgr, 
1073                       "surface state", 
1074                       sizeof(struct i965_surface_state), 
1075                       4096);
1076     assert(bo);
1077     pp_context->surfaces[index].ss_bo = bo;
1078     dri_bo_map(bo, True);
1079     assert(bo->virtual);
1080     ss = bo->virtual;
1081     memset(ss, 0, sizeof(*ss));
1082     ss->ss0.surface_type = I965_SURFACE_2D;
1083     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1084     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1085     ss->ss2.width = pp_out_w / 4 - 1;
1086     ss->ss2.height = pp_out_h - 1;
1087     ss->ss3.pitch = pp_out_w - 1;
1088     dri_bo_emit_reloc(bo,
1089                       I915_GEM_DOMAIN_RENDER, 
1090                       I915_GEM_DOMAIN_RENDER,
1091                       0,
1092                       offsetof(struct i965_surface_state, ss1),
1093                       pp_context->surfaces[index].s_bo);
1094     dri_bo_unmap(bo);
1095
1096     /* destination UV surface index 8 */
1097     index = 8;
1098     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1099     dri_bo_reference(pp_context->surfaces[index].s_bo);
1100     bo = dri_bo_alloc(i965->intel.bufmgr, 
1101                       "surface state", 
1102                       sizeof(struct i965_surface_state), 
1103                       4096);
1104     assert(bo);
1105     pp_context->surfaces[index].ss_bo = bo;
1106     dri_bo_map(bo, True);
1107     assert(bo->virtual);
1108     ss = bo->virtual;
1109     memset(ss, 0, sizeof(*ss));
1110     ss->ss0.surface_type = I965_SURFACE_2D;
1111     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1112     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + pp_out_w * pp_out_h;
1113     ss->ss2.width = pp_out_w / 4 - 1;
1114     ss->ss2.height = pp_out_h / 2 - 1;
1115     ss->ss3.pitch = pp_out_w - 1;
1116     dri_bo_emit_reloc(bo,
1117                       I915_GEM_DOMAIN_RENDER, 
1118                       I915_GEM_DOMAIN_RENDER,
1119                       pp_out_w * pp_out_h,
1120                       offsetof(struct i965_surface_state, ss1),
1121                       pp_context->surfaces[index].s_bo);
1122     dri_bo_unmap(bo);
1123
1124     /* sampler state */
1125     dri_bo_map(pp_context->sampler_state_table.bo, True);
1126     assert(pp_context->sampler_state_table.bo->virtual);
1127     sampler_state = pp_context->sampler_state_table.bo->virtual;
1128
1129     /* SIMD16 Y index 1 */
1130     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
1131     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1132     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1133     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1134     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1135
1136     /* SIMD16 UV index 2 */
1137     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
1138     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1139     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1140     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1141     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1142
1143     dri_bo_unmap(pp_context->sampler_state_table.bo);
1144
1145     /* private function & data */
1146     pp_context->pp_x_steps = pp_scaling_x_steps;
1147     pp_context->pp_y_steps = pp_scaling_y_steps;
1148     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
1149
1150     pp_scaling_context->dest_w = pp_out_w;
1151     pp_scaling_context->dest_h = pp_out_h;
1152
1153     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) 1.0 / pp_out_h;
1154     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) 1.0 / pp_out_w;
1155     pp_inline_parameter.grf5.block_count_x = pp_out_w / 16;   /* 1 x N */
1156     pp_inline_parameter.grf5.number_blocks = pp_out_w / 16;
1157     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1158     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1159 }
1160
1161 static int
1162 pp_avs_x_steps(void *private_context)
1163 {
1164     struct pp_avs_context *pp_avs_context = private_context;
1165
1166     return pp_avs_context->dest_w / 16;
1167 }
1168
1169 static int
1170 pp_avs_y_steps(void *private_context)
1171 {
1172     return 1;
1173 }
1174
1175 static int
1176 pp_avs_set_block_parameter(void *private_context, int x, int y)
1177 {
1178     struct pp_avs_context *pp_avs_context = private_context;
1179     float src_x_steping, src_y_steping, video_step_delta;
1180     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
1181
1182     if (tmp_w >= pp_avs_context->dest_w) {
1183         pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1184         pp_inline_parameter.grf6.video_step_delta = 0;
1185         
1186         if (x == 0) {
1187             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2;
1188         } else {
1189             src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1190             video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1191             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1192                 16 * 15 * video_step_delta / 2;
1193         }
1194     } else {
1195         int n0, n1, n2, nls_left, nls_right;
1196         int factor_a = 5, factor_b = 4;
1197         float f;
1198
1199         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1200         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1201         n2 = tmp_w / (16 * factor_a);
1202         nls_left = n0 + n2;
1203         nls_right = n1 + n2;
1204         f = (float) n2 * 16 / tmp_w;
1205         
1206         if (n0 < 5) {
1207             pp_inline_parameter.grf6.video_step_delta = 0.0;
1208
1209             if (x == 0) {
1210                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1211                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = 0.0;
1212             } else {
1213                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1214                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1215                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1216                     16 * 15 * video_step_delta / 2;
1217             }
1218         } else {
1219             if (x < nls_left) {
1220                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1221                 float a = f / (nls_left * 16 * factor_b);
1222                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1223                 
1224                 pp_inline_parameter.grf6.video_step_delta = b;
1225
1226                 if (x == 0) {
1227                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = 0.0;
1228                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a;
1229                 } else {
1230                     src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1231                     video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1232                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1233                         16 * 15 * video_step_delta / 2;
1234                     pp_inline_parameter.grf5.normalized_video_x_scaling_step += 16 * b;
1235                 }
1236             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1237                 /* scale the center linearly */
1238                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1239                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1240                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1241                     16 * 15 * video_step_delta / 2;
1242                 pp_inline_parameter.grf6.video_step_delta = 0.0;
1243                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1244             } else {
1245                 float a = f / (nls_right * 16 * factor_b);
1246                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1247
1248                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1249                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1250                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1251                     16 * 15 * video_step_delta / 2;
1252                 pp_inline_parameter.grf6.video_step_delta = -b;
1253
1254                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
1255                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
1256                 else
1257                     pp_inline_parameter.grf5.normalized_video_x_scaling_step -= b * 16;
1258             }
1259         }
1260     }
1261
1262     src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
1263     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8;
1264     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1265     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
1266
1267     return 0;
1268 }
1269
1270 static void
1271 pp_nv12_avs_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
1272                        unsigned short srcw, unsigned short srch,
1273                        unsigned short destw, unsigned short desth)
1274 {
1275     struct i965_driver_data *i965 = i965_driver_data(ctx);
1276     struct i965_post_processing_context *pp_context = i965->pp_context;
1277     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1278     struct object_surface *obj_surface;
1279     struct i965_surface_state *ss;
1280     struct i965_sampler_8x8 *sampler_8x8;
1281     struct i965_sampler_8x8_state *sampler_8x8_state;
1282     struct i965_surface_state2 *ss_8x8;
1283     dri_bo *bo, *src_bo;
1284     int index;
1285     int w, h;
1286     int orig_w, orig_h;
1287     int pp_out_w, pp_out_h;
1288     int orig_pp_out_w, orig_pp_out_h;
1289     unsigned int tiling, swizzle;
1290
1291     /* surface */
1292     obj_surface = SURFACE(surface);
1293     
1294     if (input == 1) {
1295         orig_w = obj_surface->orig_pp_out_width;
1296         orig_h = obj_surface->orig_pp_out_height;
1297         w = obj_surface->pp_out_width;
1298         h = obj_surface->pp_out_height;
1299         src_bo = obj_surface->pp_out_bo;
1300     } else {
1301         orig_w = obj_surface->orig_width;
1302         orig_h = obj_surface->orig_height;
1303         w = obj_surface->width;
1304         h = obj_surface->height;
1305         src_bo = obj_surface->bo;
1306     }
1307
1308     assert(src_bo);
1309     dri_bo_get_tiling(src_bo, &tiling, &swizzle);
1310
1311     /* source Y surface index 1 */
1312     index = 1;
1313     pp_context->surfaces[index].s_bo = src_bo;
1314     dri_bo_reference(pp_context->surfaces[index].s_bo);
1315     bo = dri_bo_alloc(i965->intel.bufmgr, 
1316                       "Y surface state for sample_8x8", 
1317                       sizeof(struct i965_surface_state2), 
1318                       4096);
1319     assert(bo);
1320     pp_context->surfaces[index].ss_bo = bo;
1321     dri_bo_map(bo, True);
1322     assert(bo->virtual);
1323     ss_8x8 = bo->virtual;
1324     memset(ss_8x8, 0, sizeof(*ss_8x8));
1325     ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
1326     ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
1327     ss_8x8->ss1.width = orig_w - 1;
1328     ss_8x8->ss1.height = orig_h - 1;
1329     ss_8x8->ss2.half_pitch_for_chroma = 0;
1330     ss_8x8->ss2.pitch = w - 1;
1331     ss_8x8->ss2.interleave_chroma = 0;
1332     ss_8x8->ss2.surface_format = SURFACE_FORMAT_Y8_UNORM;
1333     ss_8x8->ss3.x_offset_for_cb = 0;
1334     ss_8x8->ss3.y_offset_for_cb = 0;
1335     pp_set_surface2_tiling(ss_8x8, tiling);
1336     dri_bo_emit_reloc(bo,
1337                       I915_GEM_DOMAIN_RENDER, 
1338                       0,
1339                       0,
1340                       offsetof(struct i965_surface_state2, ss0),
1341                       pp_context->surfaces[index].s_bo);
1342     dri_bo_unmap(bo);
1343
1344     /* source UV surface index 2 */
1345     index = 2;
1346     pp_context->surfaces[index].s_bo = src_bo;
1347     dri_bo_reference(pp_context->surfaces[index].s_bo);
1348     bo = dri_bo_alloc(i965->intel.bufmgr, 
1349                       "UV surface state for sample_8x8", 
1350                       sizeof(struct i965_surface_state2), 
1351                       4096);
1352     assert(bo);
1353     pp_context->surfaces[index].ss_bo = bo;
1354     dri_bo_map(bo, True);
1355     assert(bo->virtual);
1356     ss_8x8 = bo->virtual;
1357     memset(ss_8x8, 0, sizeof(*ss_8x8));
1358     ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset + w * h;
1359     ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
1360     ss_8x8->ss1.width = orig_w - 1;
1361     ss_8x8->ss1.height = orig_h - 1;
1362     ss_8x8->ss2.half_pitch_for_chroma = 0;
1363     ss_8x8->ss2.pitch = w - 1;
1364     ss_8x8->ss2.interleave_chroma = 1;
1365     ss_8x8->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
1366     ss_8x8->ss3.x_offset_for_cb = 0;
1367     ss_8x8->ss3.y_offset_for_cb = 0;
1368     pp_set_surface2_tiling(ss_8x8, tiling);
1369     dri_bo_emit_reloc(bo,
1370                       I915_GEM_DOMAIN_RENDER, 
1371                       0,
1372                       w * h,
1373                       offsetof(struct i965_surface_state2, ss0),
1374                       pp_context->surfaces[index].s_bo);
1375     dri_bo_unmap(bo);
1376
1377     orig_pp_out_w = destw;
1378     orig_pp_out_h = desth;
1379     pp_out_w = ALIGN(orig_pp_out_w, 16);
1380     pp_out_h = ALIGN(orig_pp_out_h, 16);
1381     dri_bo_unreference(obj_surface->pp_out_bo);
1382     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
1383                                           "intermediate surface",
1384                                           SIZE_YUV420(pp_out_w, pp_out_h),
1385                                           4096);
1386     assert(obj_surface->pp_out_bo);
1387     obj_surface->orig_pp_out_width = orig_pp_out_w;
1388     obj_surface->orig_pp_out_height = orig_pp_out_h;
1389     obj_surface->pp_out_width = pp_out_w;
1390     obj_surface->pp_out_height = pp_out_h;
1391
1392     /* destination Y surface index 7 */
1393     index = 7;
1394     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1395     dri_bo_reference(pp_context->surfaces[index].s_bo);
1396     bo = dri_bo_alloc(i965->intel.bufmgr, 
1397                       "surface state", 
1398                       sizeof(struct i965_surface_state), 
1399                       4096);
1400     assert(bo);
1401     pp_context->surfaces[index].ss_bo = bo;
1402     dri_bo_map(bo, True);
1403     assert(bo->virtual);
1404     ss = bo->virtual;
1405     memset(ss, 0, sizeof(*ss));
1406     ss->ss0.surface_type = I965_SURFACE_2D;
1407     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1408     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1409     ss->ss2.width = pp_out_w / 4 - 1;
1410     ss->ss2.height = pp_out_h - 1;
1411     ss->ss3.pitch = pp_out_w - 1;
1412     dri_bo_emit_reloc(bo,
1413                       I915_GEM_DOMAIN_RENDER, 
1414                       I915_GEM_DOMAIN_RENDER,
1415                       0,
1416                       offsetof(struct i965_surface_state, ss1),
1417                       pp_context->surfaces[index].s_bo);
1418     dri_bo_unmap(bo);
1419
1420     /* destination UV surface index 8 */
1421     index = 8;
1422     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1423     dri_bo_reference(pp_context->surfaces[index].s_bo);
1424     bo = dri_bo_alloc(i965->intel.bufmgr, 
1425                       "surface state", 
1426                       sizeof(struct i965_surface_state), 
1427                       4096);
1428     assert(bo);
1429     pp_context->surfaces[index].ss_bo = bo;
1430     dri_bo_map(bo, True);
1431     assert(bo->virtual);
1432     ss = bo->virtual;
1433     memset(ss, 0, sizeof(*ss));
1434     ss->ss0.surface_type = I965_SURFACE_2D;
1435     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1436     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + pp_out_w * pp_out_h;
1437     ss->ss2.width = pp_out_w / 4 - 1;
1438     ss->ss2.height = pp_out_h / 2 - 1;
1439     ss->ss3.pitch = pp_out_w - 1;
1440     dri_bo_emit_reloc(bo,
1441                       I915_GEM_DOMAIN_RENDER, 
1442                       I915_GEM_DOMAIN_RENDER,
1443                       pp_out_w * pp_out_h,
1444                       offsetof(struct i965_surface_state, ss1),
1445                       pp_context->surfaces[index].s_bo);
1446     dri_bo_unmap(bo);
1447     
1448     /* sampler 8x8 state */
1449     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1450     assert(pp_context->sampler_state_table.bo_8x8->virtual);
1451     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1452     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1453     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1454     sampler_8x8_state->dw136.default_sharpness_level = 0;
1455     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1456     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1457     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1458     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1459
1460     /* sampler 8x8 */
1461     dri_bo_map(pp_context->sampler_state_table.bo, True);
1462     assert(pp_context->sampler_state_table.bo->virtual);
1463     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
1464     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1465
1466     /* sample_8x8 Y index 1 */
1467     index = 1;
1468     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1469     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1470     sampler_8x8[index].dw0.ief_bypass = 0;
1471     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1472     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1473     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1474     sampler_8x8[index].dw2.global_noise_estimation = 22;
1475     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1476     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1477     sampler_8x8[index].dw3.strong_edge_weight = 7;
1478     sampler_8x8[index].dw3.regular_weight = 2;
1479     sampler_8x8[index].dw3.non_edge_weight = 0;
1480     sampler_8x8[index].dw3.gain_factor = 40;
1481     sampler_8x8[index].dw4.steepness_boost = 0;
1482     sampler_8x8[index].dw4.steepness_threshold = 0;
1483     sampler_8x8[index].dw4.mr_boost = 0;
1484     sampler_8x8[index].dw4.mr_threshold = 5;
1485     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1486     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1487     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1488     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1489     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1490     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1491     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1492     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1493     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1494     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1495     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1496     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1497     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1498     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1499     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1500     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1501     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1502     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1503     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1504     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1505     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1506     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1507     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1508     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1509     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1510     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1511     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1512     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1513     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1514     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1515     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1516     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1517     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1518     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1519     sampler_8x8[index].dw13.limiter_boost = 0;
1520     sampler_8x8[index].dw13.minimum_limiter = 10;
1521     sampler_8x8[index].dw13.maximum_limiter = 11;
1522     sampler_8x8[index].dw14.clip_limiter = 130;
1523     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1524                       I915_GEM_DOMAIN_RENDER, 
1525                       0,
1526                       0,
1527                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1528                       pp_context->sampler_state_table.bo_8x8);
1529
1530     dri_bo_map(pp_context->sampler_state_table.bo_8x8_uv, True);
1531     assert(pp_context->sampler_state_table.bo_8x8_uv->virtual);
1532     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1533     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8_uv->virtual;
1534     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1535     sampler_8x8_state->dw136.default_sharpness_level = 0;
1536     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0;
1537     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1538     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1539     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8_uv);
1540
1541     /* sample_8x8 UV index 2 */
1542     index = 2;
1543     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1544     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_NEAREST;
1545     sampler_8x8[index].dw0.ief_bypass = 0;
1546     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1547     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1548     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8_uv->offset >> 5;
1549     sampler_8x8[index].dw2.global_noise_estimation = 22;
1550     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1551     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1552     sampler_8x8[index].dw3.strong_edge_weight = 7;
1553     sampler_8x8[index].dw3.regular_weight = 2;
1554     sampler_8x8[index].dw3.non_edge_weight = 0;
1555     sampler_8x8[index].dw3.gain_factor = 40;
1556     sampler_8x8[index].dw4.steepness_boost = 0;
1557     sampler_8x8[index].dw4.steepness_threshold = 0;
1558     sampler_8x8[index].dw4.mr_boost = 0;
1559     sampler_8x8[index].dw4.mr_threshold = 5;
1560     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1561     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1562     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1563     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1564     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1565     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1566     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1567     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1568     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1569     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1570     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1571     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1572     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1573     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1574     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1575     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1576     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1577     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1578     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1579     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1580     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1581     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1582     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1583     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1584     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1585     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1586     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1587     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1588     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1589     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1590     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1591     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1592     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1593     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1594     sampler_8x8[index].dw13.limiter_boost = 0;
1595     sampler_8x8[index].dw13.minimum_limiter = 10;
1596     sampler_8x8[index].dw13.maximum_limiter = 11;
1597     sampler_8x8[index].dw14.clip_limiter = 130;
1598     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1599                       I915_GEM_DOMAIN_RENDER, 
1600                       0,
1601                       0,
1602                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1603                       pp_context->sampler_state_table.bo_8x8_uv);
1604
1605     dri_bo_unmap(pp_context->sampler_state_table.bo);
1606
1607     /* private function & data */
1608     pp_context->pp_x_steps = pp_avs_x_steps;
1609     pp_context->pp_y_steps = pp_avs_y_steps;
1610     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
1611
1612     pp_avs_context->dest_w = pp_out_w;
1613     pp_avs_context->dest_h = pp_out_h;
1614     pp_avs_context->src_w = w;
1615     pp_avs_context->src_h = h;
1616
1617     pp_static_parameter.grf4.r4_2.avs.nlas = 1;
1618     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) 1.0 / pp_out_h;
1619     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) 1.0 / pp_out_w;
1620     pp_inline_parameter.grf5.block_count_x = 1;        /* M x 1 */
1621     pp_inline_parameter.grf5.number_blocks = pp_out_h / 8;
1622     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1623     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1624     pp_inline_parameter.grf6.video_step_delta = 0.0;
1625 }
1626
1627 static int
1628 pp_dndi_x_steps(void *private_context)
1629 {
1630     return 1;
1631 }
1632
1633 static int
1634 pp_dndi_y_steps(void *private_context)
1635 {
1636     struct pp_dndi_context *pp_dndi_context = private_context;
1637
1638     return pp_dndi_context->dest_h / 4;
1639 }
1640
1641 static int
1642 pp_dndi_set_block_parameter(void *private_context, int x, int y)
1643 {
1644     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1645     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 4;
1646
1647     return 0;
1648 }
1649
1650 static 
1651 void pp_nv12_dndi_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
1652                              unsigned short srcw, unsigned short srch,
1653                              unsigned short destw, unsigned short desth)
1654 {
1655     struct i965_driver_data *i965 = i965_driver_data(ctx);
1656     struct i965_post_processing_context *pp_context = i965->pp_context;
1657     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
1658     struct object_surface *obj_surface;
1659     struct i965_surface_state *ss;
1660     struct i965_surface_state2 *ss_dndi;
1661     struct i965_sampler_dndi *sampler_dndi;
1662     dri_bo *bo;
1663     int index;
1664     int w, h;
1665     int orig_w, orig_h;
1666     unsigned int tiling, swizzle;
1667
1668     /* surface */
1669     obj_surface = SURFACE(surface);
1670     orig_w = obj_surface->orig_width;
1671     orig_h = obj_surface->orig_height;
1672     w = obj_surface->width;
1673     h = obj_surface->height;
1674
1675     if (pp_context->stmm.bo == NULL) {
1676         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
1677                                            "STMM surface",
1678                                            w * h,
1679                                            4096);
1680         assert(pp_context->stmm.bo);
1681     }
1682
1683     dri_bo_unreference(obj_surface->pp_out_bo);
1684     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
1685                                           "intermediate surface",
1686                                           SIZE_YUV420(w, h),
1687                                           4096);
1688     assert(obj_surface->pp_out_bo);
1689     obj_surface->orig_pp_out_width = orig_w;
1690     obj_surface->orig_pp_out_height = orig_h;
1691     obj_surface->pp_out_width = w;
1692     obj_surface->pp_out_height = h;
1693
1694     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
1695     /* source UV surface index 2 */
1696     index = 2;
1697     pp_context->surfaces[index].s_bo = obj_surface->bo;
1698     dri_bo_reference(pp_context->surfaces[index].s_bo);
1699     bo = dri_bo_alloc(i965->intel.bufmgr, 
1700                       "surface state", 
1701                       sizeof(struct i965_surface_state), 
1702                       4096);
1703     assert(bo);
1704     pp_context->surfaces[index].ss_bo = bo;
1705     dri_bo_map(bo, True);
1706     assert(bo->virtual);
1707     ss = bo->virtual;
1708     memset(ss, 0, sizeof(*ss));
1709     ss->ss0.surface_type = I965_SURFACE_2D;
1710     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1711     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1712     ss->ss2.width = orig_w / 4 - 1;
1713     ss->ss2.height = orig_h / 2 - 1;
1714     ss->ss3.pitch = w - 1;
1715     pp_set_surface_tiling(ss, tiling);
1716     dri_bo_emit_reloc(bo,
1717                       I915_GEM_DOMAIN_RENDER, 
1718                       0,
1719                       w * h,
1720                       offsetof(struct i965_surface_state, ss1),
1721                       pp_context->surfaces[index].s_bo);
1722     dri_bo_unmap(bo);
1723
1724     /* source YUV surface index 4 */
1725     index = 4;
1726     pp_context->surfaces[index].s_bo = obj_surface->bo;
1727     dri_bo_reference(pp_context->surfaces[index].s_bo);
1728     bo = dri_bo_alloc(i965->intel.bufmgr, 
1729                       "YUV surface state for deinterlace ", 
1730                       sizeof(struct i965_surface_state2), 
1731                       4096);
1732     assert(bo);
1733     pp_context->surfaces[index].ss_bo = bo;
1734     dri_bo_map(bo, True);
1735     assert(bo->virtual);
1736     ss_dndi = bo->virtual;
1737     memset(ss_dndi, 0, sizeof(*ss_dndi));
1738     ss_dndi->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
1739     ss_dndi->ss1.cbcr_pixel_offset_v_direction = 0;
1740     ss_dndi->ss1.width = w - 1;
1741     ss_dndi->ss1.height = h - 1;
1742     ss_dndi->ss1.cbcr_pixel_offset_v_direction = 1;
1743     ss_dndi->ss2.half_pitch_for_chroma = 0;
1744     ss_dndi->ss2.pitch = w - 1;
1745     ss_dndi->ss2.interleave_chroma = 1;
1746     ss_dndi->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
1747     ss_dndi->ss2.half_pitch_for_chroma = 0;
1748     ss_dndi->ss2.tiled_surface = 0;
1749     ss_dndi->ss3.x_offset_for_cb = 0;
1750     ss_dndi->ss3.y_offset_for_cb = h;
1751     pp_set_surface2_tiling(ss_dndi, tiling);
1752     dri_bo_emit_reloc(bo,
1753                       I915_GEM_DOMAIN_RENDER, 
1754                       0,
1755                       0,
1756                       offsetof(struct i965_surface_state2, ss0),
1757                       pp_context->surfaces[index].s_bo);
1758     dri_bo_unmap(bo);
1759
1760     /* source STMM surface index 20 */
1761     index = 20;
1762     pp_context->surfaces[index].s_bo = pp_context->stmm.bo;
1763     dri_bo_reference(pp_context->surfaces[index].s_bo);
1764     bo = dri_bo_alloc(i965->intel.bufmgr, 
1765                       "STMM surface state for deinterlace ", 
1766                       sizeof(struct i965_surface_state2), 
1767                       4096);
1768     assert(bo);
1769     pp_context->surfaces[index].ss_bo = bo;
1770     dri_bo_map(bo, True);
1771     assert(bo->virtual);
1772     ss = bo->virtual;
1773     memset(ss, 0, sizeof(*ss));
1774     ss->ss0.surface_type = I965_SURFACE_2D;
1775     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1776     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1777     ss->ss2.width = w - 1;
1778     ss->ss2.height = h - 1;
1779     ss->ss3.pitch = w - 1;
1780     dri_bo_emit_reloc(bo,
1781                       I915_GEM_DOMAIN_RENDER, 
1782                       I915_GEM_DOMAIN_RENDER,
1783                       0,
1784                       offsetof(struct i965_surface_state, ss1),
1785                       pp_context->surfaces[index].s_bo);
1786     dri_bo_unmap(bo);
1787
1788     /* destination Y surface index 7 */
1789     index = 7;
1790     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1791     dri_bo_reference(pp_context->surfaces[index].s_bo);
1792     bo = dri_bo_alloc(i965->intel.bufmgr, 
1793                       "surface state", 
1794                       sizeof(struct i965_surface_state), 
1795                       4096);
1796     assert(bo);
1797     pp_context->surfaces[index].ss_bo = bo;
1798     dri_bo_map(bo, True);
1799     assert(bo->virtual);
1800     ss = bo->virtual;
1801     memset(ss, 0, sizeof(*ss));
1802     ss->ss0.surface_type = I965_SURFACE_2D;
1803     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1804     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1805     ss->ss2.width = w / 4 - 1;
1806     ss->ss2.height = h - 1;
1807     ss->ss3.pitch = w - 1;
1808     dri_bo_emit_reloc(bo,
1809                       I915_GEM_DOMAIN_RENDER, 
1810                       I915_GEM_DOMAIN_RENDER,
1811                       0,
1812                       offsetof(struct i965_surface_state, ss1),
1813                       pp_context->surfaces[index].s_bo);
1814     dri_bo_unmap(bo);
1815
1816     /* destination UV surface index 8 */
1817     index = 8;
1818     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1819     dri_bo_reference(pp_context->surfaces[index].s_bo);
1820     bo = dri_bo_alloc(i965->intel.bufmgr, 
1821                       "surface state", 
1822                       sizeof(struct i965_surface_state), 
1823                       4096);
1824     assert(bo);
1825     pp_context->surfaces[index].ss_bo = bo;
1826     dri_bo_map(bo, True);
1827     assert(bo->virtual);
1828     ss = bo->virtual;
1829     memset(ss, 0, sizeof(*ss));
1830     ss->ss0.surface_type = I965_SURFACE_2D;
1831     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1832     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1833     ss->ss2.width = w / 4 - 1;
1834     ss->ss2.height = h / 2 - 1;
1835     ss->ss3.pitch = w - 1;
1836     dri_bo_emit_reloc(bo,
1837                       I915_GEM_DOMAIN_RENDER, 
1838                       I915_GEM_DOMAIN_RENDER,
1839                       w * h,
1840                       offsetof(struct i965_surface_state, ss1),
1841                       pp_context->surfaces[index].s_bo);
1842     dri_bo_unmap(bo);
1843
1844     /* sampler dndi */
1845     dri_bo_map(pp_context->sampler_state_table.bo, True);
1846     assert(pp_context->sampler_state_table.bo->virtual);
1847     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
1848     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
1849
1850     /* sample dndi index 1 */
1851     index = 0;
1852     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
1853     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
1854     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
1855     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
1856
1857     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
1858     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
1859     sampler_dndi[index].dw1.stmm_c2 = 0;
1860     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
1861     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
1862
1863     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
1864     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
1865     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
1866     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
1867
1868     sampler_dndi[index].dw3.maximum_stmm = 128;
1869     sampler_dndi[index].dw3.multipler_for_vecm = 2;
1870     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
1871     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
1872     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
1873
1874     sampler_dndi[index].dw4.sdi_delta = 8;
1875     sampler_dndi[index].dw4.sdi_threshold = 128;
1876     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
1877     sampler_dndi[index].dw4.stmm_shift_up = 0;
1878     sampler_dndi[index].dw4.stmm_shift_down = 0;
1879     sampler_dndi[index].dw4.minimum_stmm = 0;
1880
1881     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
1882     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
1883     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
1884     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
1885
1886     sampler_dndi[index].dw6.dn_enable = 1;
1887     sampler_dndi[index].dw6.di_enable = 1;
1888     sampler_dndi[index].dw6.di_partial = 0;
1889     sampler_dndi[index].dw6.dndi_top_first = 1;
1890     sampler_dndi[index].dw6.dndi_stream_id = 1;
1891     sampler_dndi[index].dw6.dndi_first_frame = 1;
1892     sampler_dndi[index].dw6.progressive_dn = 0;
1893     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
1894     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
1895     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
1896
1897     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
1898     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
1899     sampler_dndi[index].dw7.vdi_walker_enable = 0;
1900     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
1901
1902     dri_bo_unmap(pp_context->sampler_state_table.bo);
1903
1904     /* private function & data */
1905     pp_context->pp_x_steps = pp_dndi_x_steps;
1906     pp_context->pp_y_steps = pp_dndi_y_steps;
1907     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
1908
1909     pp_static_parameter.grf1.statistics_surface_picth = w / 2;
1910     pp_static_parameter.grf1.r1_6.di.top_field_first = 0;
1911     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m2 = 64;
1912     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m1 = 192;
1913
1914     pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
1915     pp_inline_parameter.grf5.number_blocks = w / 16;
1916     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1917     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1918
1919     pp_dndi_context->dest_w = w;
1920     pp_dndi_context->dest_h = h;
1921 }
1922
1923 static void
1924 ironlake_pp_initialize(VADriverContextP ctx,
1925                        VASurfaceID surface,
1926                        int input,
1927                        short srcx,
1928                        short srcy,
1929                        unsigned short srcw,
1930                        unsigned short srch,
1931                        short destx,
1932                        short desty,
1933                        unsigned short destw,
1934                        unsigned short desth,
1935                        int pp_index)
1936 {
1937     struct i965_driver_data *i965 = i965_driver_data(ctx);
1938     struct i965_post_processing_context *pp_context = i965->pp_context;
1939     struct pp_module *pp_module;
1940     dri_bo *bo;
1941     int i;
1942
1943     dri_bo_unreference(pp_context->curbe.bo);
1944     bo = dri_bo_alloc(i965->intel.bufmgr,
1945                       "constant buffer",
1946                       4096, 
1947                       4096);
1948     assert(bo);
1949     pp_context->curbe.bo = bo;
1950
1951     dri_bo_unreference(pp_context->binding_table.bo);
1952     bo = dri_bo_alloc(i965->intel.bufmgr, 
1953                       "binding table",
1954                       sizeof(unsigned int), 
1955                       4096);
1956     assert(bo);
1957     pp_context->binding_table.bo = bo;
1958
1959     dri_bo_unreference(pp_context->idrt.bo);
1960     bo = dri_bo_alloc(i965->intel.bufmgr, 
1961                       "interface discriptor", 
1962                       sizeof(struct i965_interface_descriptor), 
1963                       4096);
1964     assert(bo);
1965     pp_context->idrt.bo = bo;
1966     pp_context->idrt.num_interface_descriptors = 0;
1967
1968     dri_bo_unreference(pp_context->sampler_state_table.bo);
1969     bo = dri_bo_alloc(i965->intel.bufmgr, 
1970                       "sampler state table", 
1971                       4096,
1972                       4096);
1973     assert(bo);
1974     dri_bo_map(bo, True);
1975     memset(bo->virtual, 0, bo->size);
1976     dri_bo_unmap(bo);
1977     pp_context->sampler_state_table.bo = bo;
1978
1979     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1980     bo = dri_bo_alloc(i965->intel.bufmgr, 
1981                       "sampler 8x8 state ",
1982                       4096,
1983                       4096);
1984     assert(bo);
1985     pp_context->sampler_state_table.bo_8x8 = bo;
1986
1987     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1988     bo = dri_bo_alloc(i965->intel.bufmgr, 
1989                       "sampler 8x8 state ",
1990                       4096,
1991                       4096);
1992     assert(bo);
1993     pp_context->sampler_state_table.bo_8x8_uv = bo;
1994
1995     dri_bo_unreference(pp_context->vfe_state.bo);
1996     bo = dri_bo_alloc(i965->intel.bufmgr, 
1997                       "vfe state", 
1998                       sizeof(struct i965_vfe_state), 
1999                       4096);
2000     assert(bo);
2001     pp_context->vfe_state.bo = bo;
2002     
2003     for (i = 0; i < MAX_PP_SURFACES; i++) {
2004         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
2005         pp_context->surfaces[i].ss_bo = NULL;
2006
2007         dri_bo_unreference(pp_context->surfaces[i].s_bo);
2008         pp_context->surfaces[i].s_bo = NULL;
2009     }
2010
2011     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
2012     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
2013     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
2014     assert(pp_modules);
2015     pp_context->current_pp = pp_index;
2016     pp_module = &pp_modules[pp_index];
2017     
2018     if (pp_module->initialize)
2019         pp_module->initialize(ctx, surface, input, srcw, srch, destw, desth);
2020 }
2021
2022 static void
2023 ironlake_post_processing(VADriverContextP ctx,
2024                          VASurfaceID surface,
2025                          int input,
2026                          short srcx,
2027                          short srcy,
2028                          unsigned short srcw,
2029                          unsigned short srch,
2030                          short destx,
2031                          short desty,
2032                          unsigned short destw,
2033                          unsigned short desth,
2034                          int pp_index)
2035 {
2036     ironlake_pp_initialize(ctx, surface, input,
2037                            srcx, srcy, srcw, srch,
2038                            destx, desty, destw, desth,
2039                            pp_index);
2040     ironlake_pp_states_setup(ctx);
2041     ironlake_pp_pipeline_setup(ctx);
2042 }
2043
2044 static void
2045 gen6_pp_initialize(VADriverContextP ctx,
2046                    VASurfaceID surface,
2047                    int input,
2048                    short srcx,
2049                    short srcy,
2050                    unsigned short srcw,
2051                    unsigned short srch,
2052                    short destx,
2053                    short desty,
2054                    unsigned short destw,
2055                    unsigned short desth,
2056                    int pp_index)
2057 {
2058     struct i965_driver_data *i965 = i965_driver_data(ctx);
2059     struct i965_post_processing_context *pp_context = i965->pp_context;
2060     struct pp_module *pp_module;
2061     dri_bo *bo;
2062     int i;
2063
2064     dri_bo_unreference(pp_context->curbe.bo);
2065     bo = dri_bo_alloc(i965->intel.bufmgr,
2066                       "constant buffer",
2067                       4096, 
2068                       4096);
2069     assert(bo);
2070     pp_context->curbe.bo = bo;
2071
2072     dri_bo_unreference(pp_context->binding_table.bo);
2073     bo = dri_bo_alloc(i965->intel.bufmgr, 
2074                       "binding table",
2075                       sizeof(unsigned int), 
2076                       4096);
2077     assert(bo);
2078     pp_context->binding_table.bo = bo;
2079
2080     dri_bo_unreference(pp_context->idrt.bo);
2081     bo = dri_bo_alloc(i965->intel.bufmgr, 
2082                       "interface discriptor", 
2083                       sizeof(struct gen6_interface_descriptor_data), 
2084                       4096);
2085     assert(bo);
2086     pp_context->idrt.bo = bo;
2087     pp_context->idrt.num_interface_descriptors = 0;
2088
2089     dri_bo_unreference(pp_context->sampler_state_table.bo);
2090     bo = dri_bo_alloc(i965->intel.bufmgr, 
2091                       "sampler state table", 
2092                       4096,
2093                       4096);
2094     assert(bo);
2095     dri_bo_map(bo, True);
2096     memset(bo->virtual, 0, bo->size);
2097     dri_bo_unmap(bo);
2098     pp_context->sampler_state_table.bo = bo;
2099
2100     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
2101     bo = dri_bo_alloc(i965->intel.bufmgr, 
2102                       "sampler 8x8 state ",
2103                       4096,
2104                       4096);
2105     assert(bo);
2106     pp_context->sampler_state_table.bo_8x8 = bo;
2107
2108     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
2109     bo = dri_bo_alloc(i965->intel.bufmgr, 
2110                       "sampler 8x8 state ",
2111                       4096,
2112                       4096);
2113     assert(bo);
2114     pp_context->sampler_state_table.bo_8x8_uv = bo;
2115
2116     dri_bo_unreference(pp_context->vfe_state.bo);
2117     bo = dri_bo_alloc(i965->intel.bufmgr, 
2118                       "vfe state", 
2119                       sizeof(struct i965_vfe_state), 
2120                       4096);
2121     assert(bo);
2122     pp_context->vfe_state.bo = bo;
2123     
2124     for (i = 0; i < MAX_PP_SURFACES; i++) {
2125         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
2126         pp_context->surfaces[i].ss_bo = NULL;
2127
2128         dri_bo_unreference(pp_context->surfaces[i].s_bo);
2129         pp_context->surfaces[i].s_bo = NULL;
2130     }
2131
2132     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
2133     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
2134     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
2135     assert(pp_modules);
2136     pp_context->current_pp = pp_index;
2137     pp_module = &pp_modules[pp_index];
2138     
2139     if (pp_module->initialize)
2140         pp_module->initialize(ctx, surface, input, srcw, srch, destw, desth);
2141 }
2142
2143 static void
2144 gen6_pp_binding_table(struct i965_post_processing_context *pp_context)
2145 {
2146     unsigned int *binding_table;
2147     dri_bo *bo = pp_context->binding_table.bo;
2148     int i;
2149
2150     dri_bo_map(bo, 1);
2151     assert(bo->virtual);
2152     binding_table = bo->virtual;
2153     memset(binding_table, 0, bo->size);
2154
2155     for (i = 0; i < MAX_PP_SURFACES; i++) {
2156         if (pp_context->surfaces[i].ss_bo) {
2157             assert(pp_context->surfaces[i].s_bo);
2158
2159             binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
2160             dri_bo_emit_reloc(bo,
2161                               I915_GEM_DOMAIN_INSTRUCTION, 0,
2162                               0,
2163                               i * sizeof(*binding_table),
2164                               pp_context->surfaces[i].ss_bo);
2165         }
2166     
2167     }
2168
2169     dri_bo_unmap(bo);
2170 }
2171
2172 static void
2173 gen6_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
2174 {
2175     struct gen6_interface_descriptor_data *desc;
2176     dri_bo *bo;
2177     int pp_index = pp_context->current_pp;
2178
2179     bo = pp_context->idrt.bo;
2180     dri_bo_map(bo, True);
2181     assert(bo->virtual);
2182     desc = bo->virtual;
2183     memset(desc, 0, sizeof(*desc));
2184     desc->desc0.kernel_start_pointer = 
2185         pp_modules[pp_index].bo->offset >> 6; /* reloc */
2186     desc->desc1.single_program_flow = 1;
2187     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
2188     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
2189     desc->desc2.sampler_state_pointer = 
2190         pp_context->sampler_state_table.bo->offset >> 5;
2191     desc->desc3.binding_table_entry_count = 0;
2192     desc->desc3.binding_table_pointer = 
2193         pp_context->binding_table.bo->offset >> 5; /*reloc */
2194     desc->desc4.constant_urb_entry_read_offset = 0;
2195     desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
2196
2197     dri_bo_emit_reloc(bo,
2198                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2199                       0,
2200                       offsetof(struct gen6_interface_descriptor_data, desc0),
2201                       pp_modules[pp_index].bo);
2202
2203     dri_bo_emit_reloc(bo,
2204                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2205                       desc->desc2.sampler_count << 2,
2206                       offsetof(struct gen6_interface_descriptor_data, desc2),
2207                       pp_context->sampler_state_table.bo);
2208
2209     dri_bo_emit_reloc(bo,
2210                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2211                       desc->desc3.binding_table_entry_count,
2212                       offsetof(struct gen6_interface_descriptor_data, desc3),
2213                       pp_context->binding_table.bo);
2214
2215     dri_bo_unmap(bo);
2216     pp_context->idrt.num_interface_descriptors++;
2217 }
2218
2219 static void
2220 gen6_pp_upload_constants(struct i965_post_processing_context *pp_context)
2221 {
2222     unsigned char *constant_buffer;
2223
2224     assert(sizeof(pp_static_parameter) == 128);
2225     dri_bo_map(pp_context->curbe.bo, 1);
2226     assert(pp_context->curbe.bo->virtual);
2227     constant_buffer = pp_context->curbe.bo->virtual;
2228     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
2229     dri_bo_unmap(pp_context->curbe.bo);
2230 }
2231
2232 static void
2233 gen6_pp_states_setup(VADriverContextP ctx)
2234 {
2235     struct i965_driver_data *i965 = i965_driver_data(ctx);
2236     struct i965_post_processing_context *pp_context = i965->pp_context;
2237
2238     gen6_pp_binding_table(pp_context);
2239     gen6_pp_interface_descriptor_table(pp_context);
2240     gen6_pp_upload_constants(pp_context);
2241 }
2242
2243 static void
2244 gen6_pp_pipeline_select(VADriverContextP ctx)
2245 {
2246     BEGIN_BATCH(ctx, 1);
2247     OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
2248     ADVANCE_BATCH(ctx);
2249 }
2250
2251 static void
2252 gen6_pp_state_base_address(VADriverContextP ctx)
2253 {
2254     BEGIN_BATCH(ctx, 10);
2255     OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | (10 - 2));
2256     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2257     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2258     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2259     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2260     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2261     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2262     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2263     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2264     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
2265     ADVANCE_BATCH(ctx);
2266 }
2267
2268 static void
2269 gen6_pp_vfe_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2270 {
2271     BEGIN_BATCH(ctx, 8);
2272     OUT_BATCH(ctx, CMD_MEDIA_VFE_STATE | (8 - 2));
2273     OUT_BATCH(ctx, 0);
2274     OUT_BATCH(ctx,
2275               (pp_context->urb.num_vfe_entries - 1) << 16 |
2276               pp_context->urb.num_vfe_entries << 8);
2277     OUT_BATCH(ctx, 0);
2278     OUT_BATCH(ctx,
2279               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* in 256 bits unit */
2280               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 - 1));            /* in 256 bits unit */
2281     OUT_BATCH(ctx, 0);
2282     OUT_BATCH(ctx, 0);
2283     OUT_BATCH(ctx, 0);
2284     ADVANCE_BATCH(ctx);
2285 }
2286
2287 static void
2288 gen6_pp_curbe_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2289 {
2290     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512 <= pp_context->curbe.bo->size);
2291
2292     BEGIN_BATCH(ctx, 4);
2293     OUT_BATCH(ctx, CMD_MEDIA_CURBE_LOAD | (4 - 2));
2294     OUT_BATCH(ctx, 0);
2295     OUT_BATCH(ctx,
2296               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512);
2297     OUT_RELOC(ctx, 
2298               pp_context->curbe.bo,
2299               I915_GEM_DOMAIN_INSTRUCTION, 0,
2300               0);
2301     ADVANCE_BATCH(ctx);
2302 }
2303
2304 static void
2305 gen6_interface_descriptor_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2306 {
2307     BEGIN_BATCH(ctx, 4);
2308     OUT_BATCH(ctx, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
2309     OUT_BATCH(ctx, 0);
2310     OUT_BATCH(ctx,
2311               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
2312     OUT_RELOC(ctx, 
2313               pp_context->idrt.bo,
2314               I915_GEM_DOMAIN_INSTRUCTION, 0,
2315               0);
2316     ADVANCE_BATCH(ctx);
2317 }
2318
2319 static void
2320 gen6_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2321 {
2322     int x, x_steps, y, y_steps;
2323
2324     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
2325     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
2326
2327     for (y = 0; y < y_steps; y++) {
2328         for (x = 0; x < x_steps; x++) {
2329             if (!pp_context->pp_set_block_parameter(&pp_context->private_context, x, y)) {
2330                 BEGIN_BATCH(ctx, 22);
2331                 OUT_BATCH(ctx, CMD_MEDIA_OBJECT | 20);
2332                 OUT_BATCH(ctx, 0);
2333                 OUT_BATCH(ctx, 0); /* no indirect data */
2334                 OUT_BATCH(ctx, 0);
2335                 OUT_BATCH(ctx, 0); /* scoreboard */
2336                 OUT_BATCH(ctx, 0);
2337
2338                 /* inline data grf 5-6 */
2339                 assert(sizeof(pp_inline_parameter) == 64);
2340                 intel_batchbuffer_data(ctx, &pp_inline_parameter, sizeof(pp_inline_parameter));
2341
2342                 ADVANCE_BATCH(ctx);
2343             }
2344         }
2345     }
2346 }
2347
2348 static void
2349 gen6_pp_pipeline_setup(VADriverContextP ctx)
2350 {
2351     struct i965_driver_data *i965 = i965_driver_data(ctx);
2352     struct i965_post_processing_context *pp_context = i965->pp_context;
2353
2354     intel_batchbuffer_start_atomic(ctx, 0x1000);
2355     intel_batchbuffer_emit_mi_flush(ctx);
2356     gen6_pp_pipeline_select(ctx);
2357     gen6_pp_curbe_load(ctx, pp_context);
2358     gen6_interface_descriptor_load(ctx, pp_context);
2359     gen6_pp_state_base_address(ctx);
2360     gen6_pp_vfe_state(ctx, pp_context);
2361     gen6_pp_object_walker(ctx, pp_context);
2362     intel_batchbuffer_end_atomic(ctx);
2363 }
2364
2365 static void
2366 gen6_post_processing(VADriverContextP ctx,
2367                      VASurfaceID surface,
2368                      int input,
2369                      short srcx,
2370                      short srcy,
2371                      unsigned short srcw,
2372                      unsigned short srch,
2373                      short destx,
2374                      short desty,
2375                      unsigned short destw,
2376                      unsigned short desth,
2377                      int pp_index)
2378 {
2379     gen6_pp_initialize(ctx, surface, input,
2380                        srcx, srcy, srcw, srch,
2381                        destx, desty, destw, desth,
2382                        pp_index);
2383     gen6_pp_states_setup(ctx);
2384     gen6_pp_pipeline_setup(ctx);
2385 }
2386
2387 static void
2388 i965_post_processing_internal(VADriverContextP ctx,
2389                               VASurfaceID surface,
2390                               int input,
2391                               short srcx,
2392                               short srcy,
2393                               unsigned short srcw,
2394                               unsigned short srch,
2395                               short destx,
2396                               short desty,
2397                               unsigned short destw,
2398                               unsigned short desth,
2399                               int pp_index)
2400 {
2401     struct i965_driver_data *i965 = i965_driver_data(ctx);
2402
2403     if (IS_GEN6(i965->intel.device_id))
2404         gen6_post_processing(ctx, surface, input,
2405                              srcx, srcy, srcw, srch,
2406                              destx, desty, destw, desth,
2407                              pp_index);
2408     else
2409         ironlake_post_processing(ctx, surface, input,
2410                                  srcx, srcy, srcw, srch,
2411                                  destx, desty, destw, desth,
2412                                  pp_index);
2413 }
2414
2415 void
2416 i965_post_processing(VADriverContextP ctx,
2417                      VASurfaceID surface,
2418                      short srcx,
2419                      short srcy,
2420                      unsigned short srcw,
2421                      unsigned short srch,
2422                      short destx,
2423                      short desty,
2424                      unsigned short destw,
2425                      unsigned short desth,
2426                      unsigned int flag)
2427 {
2428     struct i965_driver_data *i965 = i965_driver_data(ctx);
2429
2430     if (HAS_PP(i965)) {
2431         /* Currently only support post processing for NV12 surface */
2432         if (i965->render_state.interleaved_uv) {
2433             int internal_input = 0;
2434
2435             if (flag & I965_PP_FLAG_DEINTERLACING) {
2436                 i965_post_processing_internal(ctx, surface, internal_input,
2437                                               srcx, srcy, srcw, srch,
2438                                               destx, desty, destw, desth,
2439                                               PP_NV12_DNDI);
2440                 internal_input = 1;
2441             }
2442
2443             if (flag & I965_PP_FLAG_AVS) {
2444                 i965_post_processing_internal(ctx, surface, internal_input,
2445                                               srcx, srcy, srcw, srch,
2446                                               destx, desty, destw, desth,
2447                                               PP_NV12_AVS);
2448             }
2449         }
2450     }
2451 }       
2452
2453 Bool
2454 i965_post_processing_terminate(VADriverContextP ctx)
2455 {
2456     struct i965_driver_data *i965 = i965_driver_data(ctx);
2457     struct i965_post_processing_context *pp_context = i965->pp_context;
2458     int i;
2459
2460     if (HAS_PP(i965)) {
2461         if (pp_context) {
2462             dri_bo_unreference(pp_context->curbe.bo);
2463             pp_context->curbe.bo = NULL;
2464
2465             for (i = 0; i < MAX_PP_SURFACES; i++) {
2466                 dri_bo_unreference(pp_context->surfaces[i].ss_bo);
2467                 pp_context->surfaces[i].ss_bo = NULL;
2468
2469                 dri_bo_unreference(pp_context->surfaces[i].s_bo);
2470                 pp_context->surfaces[i].s_bo = NULL;
2471             }
2472
2473             dri_bo_unreference(pp_context->sampler_state_table.bo);
2474             pp_context->sampler_state_table.bo = NULL;
2475
2476             dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
2477             pp_context->sampler_state_table.bo_8x8 = NULL;
2478
2479             dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
2480             pp_context->sampler_state_table.bo_8x8_uv = NULL;
2481
2482             dri_bo_unreference(pp_context->binding_table.bo);
2483             pp_context->binding_table.bo = NULL;
2484
2485             dri_bo_unreference(pp_context->idrt.bo);
2486             pp_context->idrt.bo = NULL;
2487             pp_context->idrt.num_interface_descriptors = 0;
2488
2489             dri_bo_unreference(pp_context->vfe_state.bo);
2490             pp_context->vfe_state.bo = NULL;
2491
2492             dri_bo_unreference(pp_context->stmm.bo);
2493             pp_context->stmm.bo = NULL;
2494
2495             free(pp_context);
2496         }
2497
2498         i965->pp_context = NULL;
2499
2500         for (i = 0; i < NUM_PP_MODULES && pp_modules; i++) {
2501             struct pp_module *pp_module = &pp_modules[i];
2502
2503             dri_bo_unreference(pp_module->bo);
2504             pp_module->bo = NULL;
2505         }
2506     }
2507
2508     return True;
2509 }
2510
2511 Bool
2512 i965_post_processing_init(VADriverContextP ctx)
2513 {
2514     struct i965_driver_data *i965 = i965_driver_data(ctx);
2515     struct i965_post_processing_context *pp_context = i965->pp_context;
2516     int i;
2517
2518     if (HAS_PP(i965)) {
2519         if (pp_context == NULL) {
2520             pp_context = calloc(1, sizeof(*pp_context));
2521             i965->pp_context = pp_context;
2522         }
2523
2524         pp_context->urb.size = URB_SIZE((&i965->intel));
2525         pp_context->urb.num_vfe_entries = 32;
2526         pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
2527         pp_context->urb.num_cs_entries = 1;
2528         pp_context->urb.size_cs_entry = 2;      /* in 512 bits unit */
2529         pp_context->urb.vfe_start = 0;
2530         pp_context->urb.cs_start = pp_context->urb.vfe_start + 
2531             pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
2532         assert(pp_context->urb.cs_start + 
2533                pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
2534
2535         assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
2536
2537         if (IS_GEN6(i965->intel.device_id))
2538             pp_modules = pp_modules_gen6;
2539         else if (IS_IRONLAKE(i965->intel.device_id)) {
2540             pp_modules = pp_modules_gen5;
2541         }
2542
2543         for (i = 0; i < NUM_PP_MODULES && pp_modules; i++) {
2544             struct pp_module *pp_module = &pp_modules[i];
2545             dri_bo_unreference(pp_module->bo);
2546             pp_module->bo = dri_bo_alloc(i965->intel.bufmgr,
2547                                          pp_module->name,
2548                                          pp_module->size,
2549                                          4096);
2550             assert(pp_module->bo);
2551             dri_bo_subdata(pp_module->bo, 0, pp_module->size, pp_module->bin);
2552         }
2553     }
2554
2555     return True;
2556 }