Support DN only on Ironlake and Sandybridge
[profile/ivi/vaapi-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include <va/va_backend.h>
35
36 #include "intel_batchbuffer.h"
37 #include "intel_driver.h"
38 #include "i965_defines.h"
39 #include "i965_structs.h"
40 #include "i965_drv_video.h"
41 #include "i965_post_processing.h"
42 #include "i965_render.h"
43
44 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
45                      IS_GEN6((ctx)->intel.device_id) ||         \
46                      IS_GEN7((ctx)->intel.device_id))
47
48 #define SURFACE_STATE_PADDED_SIZE_0_I965        ALIGN(sizeof(struct i965_surface_state), 32)
49 #define SURFACE_STATE_PADDED_SIZE_1_I965        ALIGN(sizeof(struct i965_surface_state2), 32)
50 #define SURFACE_STATE_PADDED_SIZE_I965          MAX(SURFACE_STATE_PADDED_SIZE_0_I965, SURFACE_STATE_PADDED_SIZE_1_I965)
51
52 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_I965
53 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
54 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
55
56 static const uint32_t pp_null_gen5[][4] = {
57 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
58 };
59
60 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
61 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
62 };
63
64 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
65 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
66 };
67
68 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
69 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
70 };
71
72 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
73 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
74 };
75
76 static const uint32_t pp_nv12_scaling_gen5[][4] = {
77 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
78 };
79
80 static const uint32_t pp_nv12_avs_gen5[][4] = {
81 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
82 };
83
84 static const uint32_t pp_nv12_dndi_gen5[][4] = {
85 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
86 };
87
88 static const uint32_t pp_nv12_dn_gen5[][4] = {
89 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
90 };
91
92 static void pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
93                                const struct i965_surface *src_surface,
94                                const VARectangle *src_rect,
95                                const struct i965_surface *dst_surface,
96                                const VARectangle *dst_rect,
97                                void *filter_param);
98 static void pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
99                                    const struct i965_surface *src_surface,
100                                    const VARectangle *src_rect,
101                                    const struct i965_surface *dst_surface,
102                                    const VARectangle *dst_rect,
103                                    void *filter_param);
104 static void pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
105                                        const struct i965_surface *src_surface,
106                                        const VARectangle *src_rect,
107                                        const struct i965_surface *dst_surface,
108                                        const VARectangle *dst_rect,
109                                        void *filter_param);
110 static void pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
111                                             const struct i965_surface *src_surface,
112                                             const VARectangle *src_rect,
113                                             const struct i965_surface *dst_surface,
114                                             const VARectangle *dst_rect,
115                                             void *filter_param);
116 static void pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
117                                     const struct i965_surface *src_surface,
118                                     const VARectangle *src_rect,
119                                     const struct i965_surface *dst_surface,
120                                     const VARectangle *dst_rect,
121                                     void *filter_param);
122 static void pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
123                                   const struct i965_surface *src_surface,
124                                   const VARectangle *src_rect,
125                                   const struct i965_surface *dst_surface,
126                                   const VARectangle *dst_rect,
127                                   void *filter_param);
128
129 static struct pp_module pp_modules_gen5[] = {
130     {
131         {
132             "NULL module (for testing)",
133             PP_NULL,
134             pp_null_gen5,
135             sizeof(pp_null_gen5),
136             NULL,
137         },
138
139         pp_null_initialize,
140     },
141
142     {
143         {
144             "NV12_NV12",
145             PP_NV12_LOAD_SAVE_N12,
146             pp_nv12_load_save_nv12_gen5,
147             sizeof(pp_nv12_load_save_nv12_gen5),
148             NULL,
149         },
150
151         pp_plx_load_save_plx_initialize,
152     },
153
154     {
155         {
156             "NV12_PL3",
157             PP_NV12_LOAD_SAVE_PL3,
158             pp_nv12_load_save_pl3_gen5,
159             sizeof(pp_nv12_load_save_pl3_gen5),
160             NULL,
161         },
162
163         pp_plx_load_save_plx_initialize,
164     },
165
166     {
167         {
168             "PL3_NV12",
169             PP_PL3_LOAD_SAVE_N12,
170             pp_pl3_load_save_nv12_gen5,
171             sizeof(pp_pl3_load_save_nv12_gen5),
172             NULL,
173         },
174
175         pp_plx_load_save_plx_initialize,
176     },
177
178     {
179         {
180             "PL3_PL3",
181             PP_PL3_LOAD_SAVE_N12,
182             pp_pl3_load_save_pl3_gen5,
183             sizeof(pp_pl3_load_save_pl3_gen5),
184             NULL,
185         },
186
187         pp_plx_load_save_plx_initialize
188     },
189
190     {
191         {
192             "NV12 Scaling module",
193             PP_NV12_SCALING,
194             pp_nv12_scaling_gen5,
195             sizeof(pp_nv12_scaling_gen5),
196             NULL,
197         },
198
199         pp_nv12_scaling_initialize,
200     },
201
202     {
203         {
204             "NV12 AVS module",
205             PP_NV12_AVS,
206             pp_nv12_avs_gen5,
207             sizeof(pp_nv12_avs_gen5),
208             NULL,
209         },
210
211         pp_nv12_avs_initialize,
212     },
213
214     {
215         {
216             "NV12 DNDI module",
217             PP_NV12_DNDI,
218             pp_nv12_dndi_gen5,
219             sizeof(pp_nv12_dndi_gen5),
220             NULL,
221         },
222
223         pp_nv12_dndi_initialize,
224     },
225
226     {
227         {
228             "NV12 DN module",
229             PP_NV12_DN,
230             pp_nv12_dn_gen5,
231             sizeof(pp_nv12_dn_gen5),
232             NULL,
233         },
234
235         pp_nv12_dn_initialize,
236     },
237 };
238
239 static const uint32_t pp_null_gen6[][4] = {
240 #include "shaders/post_processing/gen5_6/null.g6b"
241 };
242
243 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
244 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
245 };
246
247 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
248 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
249 };
250
251 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
252 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
253 };
254
255 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
256 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
257 };
258
259 static const uint32_t pp_nv12_scaling_gen6[][4] = {
260 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g6b"
261 };
262
263 static const uint32_t pp_nv12_avs_gen6[][4] = {
264 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
265 };
266
267 static const uint32_t pp_nv12_dndi_gen6[][4] = {
268 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
269 };
270
271 static const uint32_t pp_nv12_dn_gen6[][4] = {
272 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
273 };
274
275 static struct pp_module pp_modules_gen6[] = {
276     {
277         {
278             "NULL module (for testing)",
279             PP_NULL,
280             pp_null_gen6,
281             sizeof(pp_null_gen6),
282             NULL,
283         },
284
285         pp_null_initialize,
286     },
287
288     {
289         {
290             "NV12_NV12",
291             PP_NV12_LOAD_SAVE_N12,
292             pp_nv12_load_save_nv12_gen6,
293             sizeof(pp_nv12_load_save_nv12_gen6),
294             NULL,
295         },
296
297         pp_plx_load_save_plx_initialize,
298     },
299
300     {
301         {
302             "NV12_PL3",
303             PP_NV12_LOAD_SAVE_PL3,
304             pp_nv12_load_save_pl3_gen6,
305             sizeof(pp_nv12_load_save_pl3_gen6),
306             NULL,
307         },
308         
309         pp_plx_load_save_plx_initialize,
310     },
311
312     {
313         {
314             "PL3_NV12",
315             PP_PL3_LOAD_SAVE_N12,
316             pp_pl3_load_save_nv12_gen6,
317             sizeof(pp_pl3_load_save_nv12_gen6),
318             NULL,
319         },
320
321         pp_plx_load_save_plx_initialize,
322     },
323
324     {
325         {
326             "PL3_PL3",
327             PP_PL3_LOAD_SAVE_N12,
328             pp_pl3_load_save_pl3_gen6,
329             sizeof(pp_pl3_load_save_pl3_gen6),
330             NULL,
331         },
332
333         pp_plx_load_save_plx_initialize,
334     },
335
336     {
337         {
338             "NV12 Scaling module",
339             PP_NV12_SCALING,
340             pp_nv12_scaling_gen6,
341             sizeof(pp_nv12_scaling_gen6),
342             NULL,
343         },
344
345         pp_nv12_scaling_initialize,
346     },
347
348     {
349         {
350             "NV12 AVS module",
351             PP_NV12_AVS,
352             pp_nv12_avs_gen6,
353             sizeof(pp_nv12_avs_gen6),
354             NULL,
355         },
356
357         pp_nv12_avs_initialize,
358     },
359
360     {
361         {
362             "NV12 DNDI module",
363             PP_NV12_DNDI,
364             pp_nv12_dndi_gen6,
365             sizeof(pp_nv12_dndi_gen6),
366             NULL,
367         },
368
369         pp_nv12_dndi_initialize,
370     },
371
372     {
373         {
374             "NV12 DN module",
375             PP_NV12_DN,
376             pp_nv12_dn_gen6,
377             sizeof(pp_nv12_dn_gen6),
378             NULL,
379         },
380
381         pp_nv12_dn_initialize,
382     },
383 };
384
385 #define pp_static_parameter     pp_context->pp_static_parameter
386 #define pp_inline_parameter     pp_context->pp_inline_parameter
387
388 static int
389 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
390 {
391     struct i965_driver_data *i965 = i965_driver_data(ctx);
392     int fourcc;
393
394     if (surface->flag == I965_SURFACE_IMAGE) {
395         struct object_image *obj_image = IMAGE(surface->id);
396         fourcc = obj_image->image.format.fourcc;
397     } else {
398         struct object_surface *obj_surface = SURFACE(surface->id);
399         fourcc = obj_surface->fourcc;
400     }
401
402     return fourcc;
403 }
404
405 static void
406 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
407 {
408     switch (tiling) {
409     case I915_TILING_NONE:
410         ss->ss3.tiled_surface = 0;
411         ss->ss3.tile_walk = 0;
412         break;
413     case I915_TILING_X:
414         ss->ss3.tiled_surface = 1;
415         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
416         break;
417     case I915_TILING_Y:
418         ss->ss3.tiled_surface = 1;
419         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
420         break;
421     }
422 }
423
424 static void
425 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
426 {
427     switch (tiling) {
428     case I915_TILING_NONE:
429         ss->ss2.tiled_surface = 0;
430         ss->ss2.tile_walk = 0;
431         break;
432     case I915_TILING_X:
433         ss->ss2.tiled_surface = 1;
434         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
435         break;
436     case I915_TILING_Y:
437         ss->ss2.tiled_surface = 1;
438         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
439         break;
440     }
441 }
442
443 static void
444 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
445 {
446     struct i965_interface_descriptor *desc;
447     dri_bo *bo;
448     int pp_index = pp_context->current_pp;
449
450     bo = pp_context->idrt.bo;
451     dri_bo_map(bo, 1);
452     assert(bo->virtual);
453     desc = bo->virtual;
454     memset(desc, 0, sizeof(*desc));
455     desc->desc0.grf_reg_blocks = 10;
456     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
457     desc->desc1.const_urb_entry_read_offset = 0;
458     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
459     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
460     desc->desc2.sampler_count = 0;
461     desc->desc3.binding_table_entry_count = 0;
462     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
463
464     dri_bo_emit_reloc(bo,
465                       I915_GEM_DOMAIN_INSTRUCTION, 0,
466                       desc->desc0.grf_reg_blocks,
467                       offsetof(struct i965_interface_descriptor, desc0),
468                       pp_context->pp_modules[pp_index].kernel.bo);
469
470     dri_bo_emit_reloc(bo,
471                       I915_GEM_DOMAIN_INSTRUCTION, 0,
472                       desc->desc2.sampler_count << 2,
473                       offsetof(struct i965_interface_descriptor, desc2),
474                       pp_context->sampler_state_table.bo);
475
476     dri_bo_unmap(bo);
477     pp_context->idrt.num_interface_descriptors++;
478 }
479
480 static void
481 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
482 {
483     struct i965_vfe_state *vfe_state;
484     dri_bo *bo;
485
486     bo = pp_context->vfe_state.bo;
487     dri_bo_map(bo, 1);
488     assert(bo->virtual);
489     vfe_state = bo->virtual;
490     memset(vfe_state, 0, sizeof(*vfe_state));
491     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
492     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
493     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
494     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
495     vfe_state->vfe1.children_present = 0;
496     vfe_state->vfe2.interface_descriptor_base = 
497         pp_context->idrt.bo->offset >> 4; /* reloc */
498     dri_bo_emit_reloc(bo,
499                       I915_GEM_DOMAIN_INSTRUCTION, 0,
500                       0,
501                       offsetof(struct i965_vfe_state, vfe2),
502                       pp_context->idrt.bo);
503     dri_bo_unmap(bo);
504 }
505
506 static void
507 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
508 {
509     unsigned char *constant_buffer;
510
511     assert(sizeof(pp_static_parameter) == 128);
512     dri_bo_map(pp_context->curbe.bo, 1);
513     assert(pp_context->curbe.bo->virtual);
514     constant_buffer = pp_context->curbe.bo->virtual;
515     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
516     dri_bo_unmap(pp_context->curbe.bo);
517 }
518
519 static void
520 ironlake_pp_states_setup(VADriverContextP ctx,
521                          struct i965_post_processing_context *pp_context)
522 {
523     ironlake_pp_interface_descriptor_table(pp_context);
524     ironlake_pp_vfe_state(pp_context);
525     ironlake_pp_upload_constants(pp_context);
526 }
527
528 static void
529 ironlake_pp_pipeline_select(VADriverContextP ctx,
530                             struct i965_post_processing_context *pp_context)
531 {
532     struct intel_batchbuffer *batch = pp_context->batch;
533
534     BEGIN_BATCH(batch, 1);
535     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
536     ADVANCE_BATCH(batch);
537 }
538
539 static void
540 ironlake_pp_urb_layout(VADriverContextP ctx,
541                        struct i965_post_processing_context *pp_context)
542 {
543     struct intel_batchbuffer *batch = pp_context->batch;
544     unsigned int vfe_fence, cs_fence;
545
546     vfe_fence = pp_context->urb.cs_start;
547     cs_fence = pp_context->urb.size;
548
549     BEGIN_BATCH(batch, 3);
550     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
551     OUT_BATCH(batch, 0);
552     OUT_BATCH(batch, 
553               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
554               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
555     ADVANCE_BATCH(batch);
556 }
557
558 static void
559 ironlake_pp_state_base_address(VADriverContextP ctx,
560                                struct i965_post_processing_context *pp_context)
561 {
562     struct intel_batchbuffer *batch = pp_context->batch;
563
564     BEGIN_BATCH(batch, 8);
565     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
566     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
567     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
568     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
569     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
570     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
571     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
572     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
573     ADVANCE_BATCH(batch);
574 }
575
576 static void
577 ironlake_pp_state_pointers(VADriverContextP ctx,
578                            struct i965_post_processing_context *pp_context)
579 {
580     struct intel_batchbuffer *batch = pp_context->batch;
581
582     BEGIN_BATCH(batch, 3);
583     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
584     OUT_BATCH(batch, 0);
585     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
586     ADVANCE_BATCH(batch);
587 }
588
589 static void 
590 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
591                           struct i965_post_processing_context *pp_context)
592 {
593     struct intel_batchbuffer *batch = pp_context->batch;
594
595     BEGIN_BATCH(batch, 2);
596     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
597     OUT_BATCH(batch,
598               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
599               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
600     ADVANCE_BATCH(batch);
601 }
602
603 static void
604 ironlake_pp_constant_buffer(VADriverContextP ctx,
605                             struct i965_post_processing_context *pp_context)
606 {
607     struct intel_batchbuffer *batch = pp_context->batch;
608
609     BEGIN_BATCH(batch, 2);
610     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
611     OUT_RELOC(batch, pp_context->curbe.bo,
612               I915_GEM_DOMAIN_INSTRUCTION, 0,
613               pp_context->urb.size_cs_entry - 1);
614     ADVANCE_BATCH(batch);    
615 }
616
617 static void
618 ironlake_pp_object_walker(VADriverContextP ctx,
619                           struct i965_post_processing_context *pp_context)
620 {
621     struct intel_batchbuffer *batch = pp_context->batch;
622     int x, x_steps, y, y_steps;
623
624     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
625     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
626
627     for (y = 0; y < y_steps; y++) {
628         for (x = 0; x < x_steps; x++) {
629             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
630                 BEGIN_BATCH(batch, 20);
631                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
632                 OUT_BATCH(batch, 0);
633                 OUT_BATCH(batch, 0); /* no indirect data */
634                 OUT_BATCH(batch, 0);
635
636                 /* inline data grf 5-6 */
637                 assert(sizeof(pp_inline_parameter) == 64);
638                 intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
639
640                 ADVANCE_BATCH(batch);
641             }
642         }
643     }
644 }
645
646 static void
647 ironlake_pp_pipeline_setup(VADriverContextP ctx,
648                            struct i965_post_processing_context *pp_context)
649 {
650     struct intel_batchbuffer *batch = pp_context->batch;
651
652     intel_batchbuffer_start_atomic(batch, 0x1000);
653     intel_batchbuffer_emit_mi_flush(batch);
654     ironlake_pp_pipeline_select(ctx, pp_context);
655     ironlake_pp_state_base_address(ctx, pp_context);
656     ironlake_pp_state_pointers(ctx, pp_context);
657     ironlake_pp_urb_layout(ctx, pp_context);
658     ironlake_pp_cs_urb_layout(ctx, pp_context);
659     ironlake_pp_constant_buffer(ctx, pp_context);
660     ironlake_pp_object_walker(ctx, pp_context);
661     intel_batchbuffer_end_atomic(batch);
662 }
663
664 static void
665 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
666                           dri_bo *surf_bo, unsigned long surf_bo_offset,
667                           int width, int height, int pitch, int format, 
668                           int index, int is_target)
669 {
670     struct i965_surface_state *ss;
671     dri_bo *ss_bo;
672     unsigned int tiling;
673     unsigned int swizzle;
674
675     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
676     ss_bo = pp_context->surface_state_binding_table.bo;
677     assert(ss_bo);
678
679     dri_bo_map(ss_bo, True);
680     assert(ss_bo->virtual);
681     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
682     memset(ss, 0, sizeof(*ss));
683     ss->ss0.surface_type = I965_SURFACE_2D;
684     ss->ss0.surface_format = format;
685     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
686     ss->ss2.width = width - 1;
687     ss->ss2.height = height - 1;
688     ss->ss3.pitch = pitch - 1;
689     pp_set_surface_tiling(ss, tiling);
690     dri_bo_emit_reloc(ss_bo,
691                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
692                       surf_bo_offset,
693                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
694                       surf_bo);
695     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
696     dri_bo_unmap(ss_bo);
697 }
698
699 static void
700 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
701                            dri_bo *surf_bo, unsigned long surf_bo_offset,
702                            int width, int height, int wpitch,
703                            int xoffset, int yoffset,
704                            int format, int interleave_chroma,
705                            int index)
706 {
707     struct i965_surface_state2 *ss2;
708     dri_bo *ss2_bo;
709     unsigned int tiling;
710     unsigned int swizzle;
711
712     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
713     ss2_bo = pp_context->surface_state_binding_table.bo;
714     assert(ss2_bo);
715
716     dri_bo_map(ss2_bo, True);
717     assert(ss2_bo->virtual);
718     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
719     memset(ss2, 0, sizeof(*ss2));
720     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
721     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
722     ss2->ss1.width = width - 1;
723     ss2->ss1.height = height - 1;
724     ss2->ss2.pitch = wpitch - 1;
725     ss2->ss2.interleave_chroma = interleave_chroma;
726     ss2->ss2.surface_format = format;
727     ss2->ss3.x_offset_for_cb = xoffset;
728     ss2->ss3.y_offset_for_cb = yoffset;
729     pp_set_surface2_tiling(ss2, tiling);
730     dri_bo_emit_reloc(ss2_bo,
731                       I915_GEM_DOMAIN_RENDER, 0,
732                       surf_bo_offset,
733                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
734                       surf_bo);
735     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
736     dri_bo_unmap(ss2_bo);
737 }
738
739 static void 
740 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
741                                 const struct i965_surface *surface, 
742                                 int base_index, int is_target,
743                                 int *width, int *height, int *pitch, int *offset)
744 {
745     struct i965_driver_data *i965 = i965_driver_data(ctx);
746     struct object_surface *obj_surface;
747     struct object_image *obj_image;
748     dri_bo *bo;
749     int fourcc = pp_get_surface_fourcc(ctx, surface);
750     const int Y = 0;
751     const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
752     const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
753     const int UV = 1;
754     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
755
756     if (surface->flag == I965_SURFACE_SURFACE) {
757         obj_surface = SURFACE(surface->id);
758         bo = obj_surface->bo;
759         width[0] = obj_surface->orig_width;
760         height[0] = obj_surface->orig_height;
761         pitch[0] = obj_surface->width;
762         offset[0] = 0;
763
764         if (interleaved_uv) {
765             width[1] = obj_surface->orig_width;
766             height[1] = obj_surface->orig_height / 2;
767             pitch[1] = obj_surface->width;
768             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
769         } else {
770             width[1] = obj_surface->orig_width / 2;
771             height[1] = obj_surface->orig_height / 2;
772             pitch[1] = obj_surface->width / 2;
773             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
774             width[2] = obj_surface->orig_width / 2;
775             height[2] = obj_surface->orig_height / 2;
776             pitch[2] = obj_surface->width / 2;
777             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
778         }
779     } else {
780         obj_image = IMAGE(surface->id);
781         bo = obj_image->bo;
782         width[0] = obj_image->image.width;
783         height[0] = obj_image->image.height;
784         pitch[0] = obj_image->image.pitches[0];
785         offset[0] = obj_image->image.offsets[0];
786
787         if (interleaved_uv) {
788             width[1] = obj_image->image.width;
789             height[1] = obj_image->image.height / 2;
790             pitch[1] = obj_image->image.pitches[1];
791             offset[1] = obj_image->image.offsets[1];
792         } else {
793             width[1] = obj_image->image.width / 2;
794             height[1] = obj_image->image.height / 2;
795             pitch[1] = obj_image->image.pitches[1];
796             offset[1] = obj_image->image.offsets[1];
797             width[2] = obj_image->image.width / 2;
798             height[2] = obj_image->image.height / 2;
799             pitch[2] = obj_image->image.pitches[2];
800             offset[2] = obj_image->image.offsets[2];
801         }
802     }
803
804     /* Y surface */
805     i965_pp_set_surface_state(ctx, pp_context,
806                               bo, offset[Y],
807                               width[Y] / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
808                               base_index, is_target);
809
810     if (interleaved_uv) {
811         i965_pp_set_surface_state(ctx, pp_context,
812                                   bo, offset[UV],
813                                   width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
814                                   base_index + 1, is_target);
815     } else {
816         /* U surface */
817         i965_pp_set_surface_state(ctx, pp_context,
818                                   bo, offset[U],
819                                   width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
820                                   base_index + 1, is_target);
821
822         /* V surface */
823         i965_pp_set_surface_state(ctx, pp_context,
824                                   bo, offset[V],
825                                   width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
826                                   base_index + 2, is_target);
827     }
828
829 }
830
831 static int
832 pp_null_x_steps(void *private_context)
833 {
834     return 1;
835 }
836
837 static int
838 pp_null_y_steps(void *private_context)
839 {
840     return 1;
841 }
842
843 static int
844 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
845 {
846     return 0;
847 }
848
849 static void
850 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
851                    const struct i965_surface *src_surface,
852                    const VARectangle *src_rect,
853                    const struct i965_surface *dst_surface,
854                    const VARectangle *dst_rect,
855                    void *filter_param)
856 {
857     /* private function & data */
858     pp_context->pp_x_steps = pp_null_x_steps;
859     pp_context->pp_y_steps = pp_null_y_steps;
860     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
861 }
862
863 static int
864 pp_load_save_x_steps(void *private_context)
865 {
866     return 1;
867 }
868
869 static int
870 pp_load_save_y_steps(void *private_context)
871 {
872     struct pp_load_save_context *pp_load_save_context = private_context;
873
874     return pp_load_save_context->dest_h / 8;
875 }
876
877 static int
878 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
879 {
880     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
881     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
882     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
883     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
884
885     return 0;
886 }
887
888 static void
889 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
890                                 const struct i965_surface *src_surface,
891                                 const VARectangle *src_rect,
892                                 const struct i965_surface *dst_surface,
893                                 const VARectangle *dst_rect,
894                                 void *filter_param)
895 {
896     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
897     int width[3], height[3], pitch[3], offset[3];
898     const int Y = 0;
899
900     /* source surface */
901     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
902                                     width, height, pitch, offset);
903
904     /* destination surface */
905     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
906                                     width, height, pitch, offset);
907
908     /* private function & data */
909     pp_context->pp_x_steps = pp_load_save_x_steps;
910     pp_context->pp_y_steps = pp_load_save_y_steps;
911     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
912     pp_load_save_context->dest_h = ALIGN(height[Y], 16);
913     pp_load_save_context->dest_w = ALIGN(width[Y], 16);
914
915     pp_inline_parameter.grf5.block_count_x = ALIGN(width[Y], 16) / 16;   /* 1 x N */
916     pp_inline_parameter.grf5.number_blocks = ALIGN(width[Y], 16) / 16;
917 }
918
919 static int
920 pp_scaling_x_steps(void *private_context)
921 {
922     return 1;
923 }
924
925 static int
926 pp_scaling_y_steps(void *private_context)
927 {
928     struct pp_scaling_context *pp_scaling_context = private_context;
929
930     return pp_scaling_context->dest_h / 8;
931 }
932
933 static int
934 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
935 {
936     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
937     float src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
938     float src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
939
940     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
941     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
942     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
943     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
944     
945     return 0;
946 }
947
948 static void
949 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
950                            const struct i965_surface *src_surface,
951                            const VARectangle *src_rect,
952                            const struct i965_surface *dst_surface,
953                            const VARectangle *dst_rect,
954                            void *filter_param)
955 {
956     struct i965_driver_data *i965 = i965_driver_data(ctx);
957     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
958     struct object_surface *obj_surface;
959     struct i965_sampler_state *sampler_state;
960     int in_w, in_h, in_wpitch, in_hpitch;
961     int out_w, out_h, out_wpitch, out_hpitch;
962
963     /* source surface */
964     obj_surface = SURFACE(src_surface->id);
965     in_w = obj_surface->orig_width;
966     in_h = obj_surface->orig_height;
967     in_wpitch = obj_surface->width;
968     in_hpitch = obj_surface->height;
969
970     /* source Y surface index 1 */
971     i965_pp_set_surface_state(ctx, pp_context,
972                               obj_surface->bo, 0,
973                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
974                               1, 0);
975
976     /* source UV surface index 2 */
977     i965_pp_set_surface_state(ctx, pp_context,
978                               obj_surface->bo, in_wpitch * in_hpitch,
979                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
980                               2, 0);
981
982     /* destination surface */
983     obj_surface = SURFACE(dst_surface->id);
984     out_w = obj_surface->orig_width;
985     out_h = obj_surface->orig_height;
986     out_wpitch = obj_surface->width;
987     out_hpitch = obj_surface->height;
988
989     /* destination Y surface index 7 */
990     i965_pp_set_surface_state(ctx, pp_context,
991                               obj_surface->bo, 0,
992                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
993                               7, 1);
994
995     /* destination UV surface index 8 */
996     i965_pp_set_surface_state(ctx, pp_context,
997                               obj_surface->bo, out_wpitch * out_hpitch,
998                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
999                               8, 1);
1000
1001     /* sampler state */
1002     dri_bo_map(pp_context->sampler_state_table.bo, True);
1003     assert(pp_context->sampler_state_table.bo->virtual);
1004     sampler_state = pp_context->sampler_state_table.bo->virtual;
1005
1006     /* SIMD16 Y index 1 */
1007     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
1008     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1009     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1010     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1011     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1012
1013     /* SIMD16 UV index 2 */
1014     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
1015     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1016     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1017     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1018     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1019
1020     dri_bo_unmap(pp_context->sampler_state_table.bo);
1021
1022     /* private function & data */
1023     pp_context->pp_x_steps = pp_scaling_x_steps;
1024     pp_context->pp_y_steps = pp_scaling_y_steps;
1025     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
1026
1027     pp_scaling_context->dest_x = dst_rect->x;
1028     pp_scaling_context->dest_y = dst_rect->y;
1029     pp_scaling_context->dest_w = ALIGN(dst_rect->width, 16);
1030     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 16);
1031     pp_scaling_context->src_normalized_x = (float)src_rect->x / in_w / out_w;
1032     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h / out_h;
1033
1034     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / out_h;
1035
1036     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / out_w;
1037     pp_inline_parameter.grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
1038     pp_inline_parameter.grf5.number_blocks = pp_scaling_context->dest_w / 16;
1039     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1040     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1041 }
1042
1043 static int
1044 pp_avs_x_steps(void *private_context)
1045 {
1046     struct pp_avs_context *pp_avs_context = private_context;
1047
1048     return pp_avs_context->dest_w / 16;
1049 }
1050
1051 static int
1052 pp_avs_y_steps(void *private_context)
1053 {
1054     return 1;
1055 }
1056
1057 static int
1058 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1059 {
1060     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1061     float src_x_steping, src_y_steping, video_step_delta;
1062     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
1063
1064     if (tmp_w >= pp_avs_context->dest_w) {
1065         pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1066         pp_inline_parameter.grf6.video_step_delta = 0;
1067         
1068         if (x == 0) {
1069             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
1070                 pp_avs_context->src_normalized_x;
1071         } else {
1072             src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1073             video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1074             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1075                 16 * 15 * video_step_delta / 2;
1076         }
1077     } else {
1078         int n0, n1, n2, nls_left, nls_right;
1079         int factor_a = 5, factor_b = 4;
1080         float f;
1081
1082         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1083         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1084         n2 = tmp_w / (16 * factor_a);
1085         nls_left = n0 + n2;
1086         nls_right = n1 + n2;
1087         f = (float) n2 * 16 / tmp_w;
1088         
1089         if (n0 < 5) {
1090             pp_inline_parameter.grf6.video_step_delta = 0.0;
1091
1092             if (x == 0) {
1093                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1094                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1095             } else {
1096                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1097                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1098                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1099                     16 * 15 * video_step_delta / 2;
1100             }
1101         } else {
1102             if (x < nls_left) {
1103                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1104                 float a = f / (nls_left * 16 * factor_b);
1105                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1106                 
1107                 pp_inline_parameter.grf6.video_step_delta = b;
1108
1109                 if (x == 0) {
1110                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1111                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a;
1112                 } else {
1113                     src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1114                     video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1115                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1116                         16 * 15 * video_step_delta / 2;
1117                     pp_inline_parameter.grf5.normalized_video_x_scaling_step += 16 * b;
1118                 }
1119             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1120                 /* scale the center linearly */
1121                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1122                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1123                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1124                     16 * 15 * video_step_delta / 2;
1125                 pp_inline_parameter.grf6.video_step_delta = 0.0;
1126                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1127             } else {
1128                 float a = f / (nls_right * 16 * factor_b);
1129                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1130
1131                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1132                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1133                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1134                     16 * 15 * video_step_delta / 2;
1135                 pp_inline_parameter.grf6.video_step_delta = -b;
1136
1137                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
1138                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
1139                 else
1140                     pp_inline_parameter.grf5.normalized_video_x_scaling_step -= b * 16;
1141             }
1142         }
1143     }
1144
1145     src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
1146     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
1147     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
1148     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
1149
1150     return 0;
1151 }
1152
1153 static void
1154 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1155                        const struct i965_surface *src_surface,
1156                        const VARectangle *src_rect,
1157                        const struct i965_surface *dst_surface,
1158                        const VARectangle *dst_rect,
1159                        void *filter_param)
1160 {
1161     struct i965_driver_data *i965 = i965_driver_data(ctx);
1162     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1163     struct object_surface *obj_surface;
1164     struct i965_sampler_8x8 *sampler_8x8;
1165     struct i965_sampler_8x8_state *sampler_8x8_state;
1166     int index;
1167     int in_w, in_h, in_wpitch, in_hpitch;
1168     int out_w, out_h, out_wpitch, out_hpitch;
1169
1170     /* surface */
1171     obj_surface = SURFACE(src_surface->id);
1172     in_w = obj_surface->orig_width;
1173     in_h = obj_surface->orig_height;
1174     in_wpitch = obj_surface->width;
1175     in_hpitch = obj_surface->height;
1176
1177     /* source Y surface index 1 */
1178     i965_pp_set_surface2_state(ctx, pp_context,
1179                                obj_surface->bo, 0,
1180                                in_w, in_h, in_wpitch,
1181                                0, 0,
1182                                SURFACE_FORMAT_Y8_UNORM, 0,
1183                                1);
1184
1185     /* source UV surface index 2 */
1186     i965_pp_set_surface2_state(ctx, pp_context,
1187                                obj_surface->bo, in_wpitch * in_hpitch,
1188                                in_w, in_h, in_wpitch,
1189                                0, 0,
1190                                SURFACE_FORMAT_PLANAR_420_8, 1,
1191                                2);
1192
1193     /* destination surface */
1194     obj_surface = SURFACE(dst_surface->id);
1195     out_w = obj_surface->orig_width;
1196     out_h = obj_surface->orig_height;
1197     out_wpitch = obj_surface->width;
1198     out_hpitch = obj_surface->height;
1199     assert(out_w <= out_wpitch && out_h <= out_hpitch);
1200
1201     /* destination Y surface index 7 */
1202     i965_pp_set_surface_state(ctx, pp_context,
1203                               obj_surface->bo, 0,
1204                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1205                               7, 1);
1206
1207     /* destination UV surface index 8 */
1208     i965_pp_set_surface_state(ctx, pp_context,
1209                               obj_surface->bo, out_wpitch * out_hpitch,
1210                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1211                               8, 1);
1212
1213     /* sampler 8x8 state */
1214     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1215     assert(pp_context->sampler_state_table.bo_8x8->virtual);
1216     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1217     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1218     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1219     sampler_8x8_state->dw136.default_sharpness_level = 0;
1220     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1221     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1222     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1223     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1224
1225     /* sampler 8x8 */
1226     dri_bo_map(pp_context->sampler_state_table.bo, True);
1227     assert(pp_context->sampler_state_table.bo->virtual);
1228     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
1229     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1230
1231     /* sample_8x8 Y index 1 */
1232     index = 1;
1233     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1234     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1235     sampler_8x8[index].dw0.ief_bypass = 0;
1236     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1237     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1238     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1239     sampler_8x8[index].dw2.global_noise_estimation = 22;
1240     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1241     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1242     sampler_8x8[index].dw3.strong_edge_weight = 7;
1243     sampler_8x8[index].dw3.regular_weight = 2;
1244     sampler_8x8[index].dw3.non_edge_weight = 0;
1245     sampler_8x8[index].dw3.gain_factor = 40;
1246     sampler_8x8[index].dw4.steepness_boost = 0;
1247     sampler_8x8[index].dw4.steepness_threshold = 0;
1248     sampler_8x8[index].dw4.mr_boost = 0;
1249     sampler_8x8[index].dw4.mr_threshold = 5;
1250     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1251     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1252     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1253     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1254     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1255     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1256     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1257     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1258     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1259     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1260     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1261     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1262     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1263     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1264     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1265     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1266     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1267     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1268     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1269     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1270     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1271     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1272     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1273     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1274     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1275     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1276     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1277     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1278     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1279     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1280     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1281     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1282     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1283     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1284     sampler_8x8[index].dw13.limiter_boost = 0;
1285     sampler_8x8[index].dw13.minimum_limiter = 10;
1286     sampler_8x8[index].dw13.maximum_limiter = 11;
1287     sampler_8x8[index].dw14.clip_limiter = 130;
1288     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1289                       I915_GEM_DOMAIN_RENDER, 
1290                       0,
1291                       0,
1292                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1293                       pp_context->sampler_state_table.bo_8x8);
1294
1295     dri_bo_map(pp_context->sampler_state_table.bo_8x8_uv, True);
1296     assert(pp_context->sampler_state_table.bo_8x8_uv->virtual);
1297     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1298     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8_uv->virtual;
1299     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1300     sampler_8x8_state->dw136.default_sharpness_level = 0;
1301     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0;
1302     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1303     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1304     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8_uv);
1305
1306     /* sample_8x8 UV index 2 */
1307     index = 2;
1308     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1309     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_NEAREST;
1310     sampler_8x8[index].dw0.ief_bypass = 0;
1311     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1312     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1313     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8_uv->offset >> 5;
1314     sampler_8x8[index].dw2.global_noise_estimation = 22;
1315     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1316     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1317     sampler_8x8[index].dw3.strong_edge_weight = 7;
1318     sampler_8x8[index].dw3.regular_weight = 2;
1319     sampler_8x8[index].dw3.non_edge_weight = 0;
1320     sampler_8x8[index].dw3.gain_factor = 40;
1321     sampler_8x8[index].dw4.steepness_boost = 0;
1322     sampler_8x8[index].dw4.steepness_threshold = 0;
1323     sampler_8x8[index].dw4.mr_boost = 0;
1324     sampler_8x8[index].dw4.mr_threshold = 5;
1325     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1326     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1327     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1328     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1329     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1330     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1331     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1332     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1333     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1334     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1335     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1336     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1337     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1338     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1339     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1340     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1341     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1342     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1343     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1344     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1345     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1346     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1347     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1348     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1349     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1350     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1351     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1352     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1353     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1354     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1355     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1356     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1357     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1358     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1359     sampler_8x8[index].dw13.limiter_boost = 0;
1360     sampler_8x8[index].dw13.minimum_limiter = 10;
1361     sampler_8x8[index].dw13.maximum_limiter = 11;
1362     sampler_8x8[index].dw14.clip_limiter = 130;
1363     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1364                       I915_GEM_DOMAIN_RENDER, 
1365                       0,
1366                       0,
1367                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1368                       pp_context->sampler_state_table.bo_8x8_uv);
1369
1370     dri_bo_unmap(pp_context->sampler_state_table.bo);
1371
1372     /* private function & data */
1373     pp_context->pp_x_steps = pp_avs_x_steps;
1374     pp_context->pp_y_steps = pp_avs_y_steps;
1375     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
1376
1377     pp_avs_context->dest_x = dst_rect->x;
1378     pp_avs_context->dest_y = dst_rect->y;
1379     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
1380     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
1381     pp_avs_context->src_normalized_x = (float)src_rect->x / in_w / out_w;
1382     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h / out_h;
1383     pp_avs_context->src_w = src_rect->width;
1384     pp_avs_context->src_h = src_rect->height;
1385
1386     pp_static_parameter.grf4.r4_2.avs.nlas = 1;
1387     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / out_h;
1388
1389     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / out_w;
1390     pp_inline_parameter.grf5.block_count_x = 1;        /* M x 1 */
1391     pp_inline_parameter.grf5.number_blocks = pp_avs_context->dest_h / 8;
1392     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1393     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1394     pp_inline_parameter.grf6.video_step_delta = 0.0;
1395 }
1396
1397 static int
1398 pp_dndi_x_steps(void *private_context)
1399 {
1400     return 1;
1401 }
1402
1403 static int
1404 pp_dndi_y_steps(void *private_context)
1405 {
1406     struct pp_dndi_context *pp_dndi_context = private_context;
1407
1408     return pp_dndi_context->dest_h / 4;
1409 }
1410
1411 static int
1412 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1413 {
1414     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1415     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 4;
1416
1417     return 0;
1418 }
1419
1420 static 
1421 void pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1422                              const struct i965_surface *src_surface,
1423                              const VARectangle *src_rect,
1424                              const struct i965_surface *dst_surface,
1425                              const VARectangle *dst_rect,
1426                              void *filter_param)
1427 {
1428     struct i965_driver_data *i965 = i965_driver_data(ctx);
1429     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
1430     struct object_surface *obj_surface;
1431     struct i965_sampler_dndi *sampler_dndi;
1432     int index;
1433     int w, h;
1434     int orig_w, orig_h;
1435
1436     /* surface */
1437     obj_surface = SURFACE(src_surface->id);
1438     orig_w = obj_surface->orig_width;
1439     orig_h = obj_surface->orig_height;
1440     w = obj_surface->width;
1441     h = obj_surface->height;
1442
1443     if (pp_context->stmm.bo == NULL) {
1444         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
1445                                            "STMM surface",
1446                                            w * h,
1447                                            4096);
1448         assert(pp_context->stmm.bo);
1449     }
1450
1451     /* source UV surface index 2 */
1452     i965_pp_set_surface_state(ctx, pp_context,
1453                               obj_surface->bo, w * h,
1454                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
1455                               2, 0);
1456
1457     /* source YUV surface index 4 */
1458     i965_pp_set_surface2_state(ctx, pp_context,
1459                                obj_surface->bo, 0,
1460                                orig_w, orig_w, w,
1461                                0, h,
1462                                SURFACE_FORMAT_PLANAR_420_8, 1,
1463                                4);
1464
1465     /* source STMM surface index 20 */
1466     i965_pp_set_surface_state(ctx, pp_context,
1467                               pp_context->stmm.bo, 0,
1468                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
1469                               20, 1);
1470
1471     /* destination surface */
1472     obj_surface = SURFACE(dst_surface->id);
1473     orig_w = obj_surface->orig_width;
1474     orig_h = obj_surface->orig_height;
1475     w = obj_surface->width;
1476     h = obj_surface->height;
1477
1478     /* destination Y surface index 7 */
1479     i965_pp_set_surface_state(ctx, pp_context,
1480                               obj_surface->bo, 0,
1481                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
1482                               7, 1);
1483
1484     /* destination UV surface index 8 */
1485     i965_pp_set_surface_state(ctx, pp_context,
1486                               obj_surface->bo, w * h,
1487                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
1488                               8, 1);
1489     /* sampler dndi */
1490     dri_bo_map(pp_context->sampler_state_table.bo, True);
1491     assert(pp_context->sampler_state_table.bo->virtual);
1492     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
1493     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
1494
1495     /* sample dndi index 1 */
1496     index = 0;
1497     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
1498     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
1499     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
1500     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
1501
1502     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
1503     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
1504     sampler_dndi[index].dw1.stmm_c2 = 0;
1505     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
1506     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
1507
1508     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
1509     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
1510     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
1511     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
1512
1513     sampler_dndi[index].dw3.maximum_stmm = 128;
1514     sampler_dndi[index].dw3.multipler_for_vecm = 2;
1515     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
1516     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
1517     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
1518
1519     sampler_dndi[index].dw4.sdi_delta = 8;
1520     sampler_dndi[index].dw4.sdi_threshold = 128;
1521     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
1522     sampler_dndi[index].dw4.stmm_shift_up = 0;
1523     sampler_dndi[index].dw4.stmm_shift_down = 0;
1524     sampler_dndi[index].dw4.minimum_stmm = 0;
1525
1526     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
1527     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
1528     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
1529     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
1530
1531     sampler_dndi[index].dw6.dn_enable = 1;
1532     sampler_dndi[index].dw6.di_enable = 1;
1533     sampler_dndi[index].dw6.di_partial = 0;
1534     sampler_dndi[index].dw6.dndi_top_first = 1;
1535     sampler_dndi[index].dw6.dndi_stream_id = 1;
1536     sampler_dndi[index].dw6.dndi_first_frame = 1;
1537     sampler_dndi[index].dw6.progressive_dn = 0;
1538     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
1539     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
1540     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
1541
1542     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
1543     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
1544     sampler_dndi[index].dw7.vdi_walker_enable = 0;
1545     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
1546
1547     dri_bo_unmap(pp_context->sampler_state_table.bo);
1548
1549     /* private function & data */
1550     pp_context->pp_x_steps = pp_dndi_x_steps;
1551     pp_context->pp_y_steps = pp_dndi_y_steps;
1552     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
1553
1554     pp_static_parameter.grf1.statistics_surface_picth = w / 2;
1555     pp_static_parameter.grf1.r1_6.di.top_field_first = 0;
1556     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m2 = 64;
1557     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m1 = 192;
1558
1559     pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
1560     pp_inline_parameter.grf5.number_blocks = w / 16;
1561     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1562     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1563
1564     pp_dndi_context->dest_w = w;
1565     pp_dndi_context->dest_h = h;
1566 }
1567
1568 static int
1569 pp_dn_x_steps(void *private_context)
1570 {
1571     return 1;
1572 }
1573
1574 static int
1575 pp_dn_y_steps(void *private_context)
1576 {
1577     struct pp_dn_context *pp_dn_context = private_context;
1578
1579     return pp_dn_context->dest_h / 8;
1580 }
1581
1582 static int
1583 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1584 {
1585     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1586     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
1587
1588     return 0;
1589 }
1590
1591 static 
1592 void pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1593                            const struct i965_surface *src_surface,
1594                            const VARectangle *src_rect,
1595                            const struct i965_surface *dst_surface,
1596                            const VARectangle *dst_rect,
1597                            void *filter_param)
1598 {
1599     struct i965_driver_data *i965 = i965_driver_data(ctx);
1600     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
1601     struct object_surface *obj_surface;
1602     struct i965_sampler_dndi *sampler_dndi;
1603     VAProcFilterBaseParameterBuffer *dn_filter_param = filter_param;
1604     int index;
1605     int w, h;
1606     int orig_w, orig_h;
1607     int dn_strength = 15;
1608
1609     if (dn_filter_param) {
1610         int value = dn_filter_param->value;
1611         
1612         if (value > 1.0)
1613             value = 1.0;
1614         
1615         if (value < 0.0)
1616             value = 0.0;
1617
1618         dn_strength = (int)(value * 31.0F);
1619     }
1620
1621     /* surface */
1622     obj_surface = SURFACE(src_surface->id);
1623     orig_w = obj_surface->orig_width;
1624     orig_h = obj_surface->orig_height;
1625     w = obj_surface->width;
1626     h = obj_surface->height;
1627
1628     if (pp_context->stmm.bo == NULL) {
1629         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
1630                                            "STMM surface",
1631                                            w * h,
1632                                            4096);
1633         assert(pp_context->stmm.bo);
1634     }
1635
1636     /* source UV surface index 2 */
1637     i965_pp_set_surface_state(ctx, pp_context,
1638                               obj_surface->bo, w * h,
1639                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
1640                               2, 0);
1641
1642     /* source YUV surface index 4 */
1643     i965_pp_set_surface2_state(ctx, pp_context,
1644                                obj_surface->bo, 0,
1645                                orig_w, orig_w, w,
1646                                0, h,
1647                                SURFACE_FORMAT_PLANAR_420_8, 1,
1648                                4);
1649
1650     /* source STMM surface index 20 */
1651     i965_pp_set_surface_state(ctx, pp_context,
1652                               pp_context->stmm.bo, 0,
1653                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
1654                               20, 1);
1655
1656     /* destination surface */
1657     obj_surface = SURFACE(dst_surface->id);
1658     orig_w = obj_surface->orig_width;
1659     orig_h = obj_surface->orig_height;
1660     w = obj_surface->width;
1661     h = obj_surface->height;
1662
1663     /* destination Y surface index 7 */
1664     i965_pp_set_surface_state(ctx, pp_context,
1665                               obj_surface->bo, 0,
1666                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
1667                               7, 1);
1668
1669     /* destination UV surface index 8 */
1670     i965_pp_set_surface_state(ctx, pp_context,
1671                               obj_surface->bo, w * h,
1672                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
1673                               8, 1);
1674     /* sampler dn */
1675     dri_bo_map(pp_context->sampler_state_table.bo, True);
1676     assert(pp_context->sampler_state_table.bo->virtual);
1677     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
1678     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
1679
1680     /* sample dndi index 1 */
1681     index = 0;
1682     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
1683     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
1684     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
1685     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
1686
1687     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
1688     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
1689     sampler_dndi[index].dw1.stmm_c2 = 0;
1690     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
1691     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
1692
1693     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
1694     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
1695     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
1696     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
1697
1698     sampler_dndi[index].dw3.maximum_stmm = 128;
1699     sampler_dndi[index].dw3.multipler_for_vecm = 2;
1700     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
1701     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
1702     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
1703
1704     sampler_dndi[index].dw4.sdi_delta = 8;
1705     sampler_dndi[index].dw4.sdi_threshold = 128;
1706     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
1707     sampler_dndi[index].dw4.stmm_shift_up = 0;
1708     sampler_dndi[index].dw4.stmm_shift_down = 0;
1709     sampler_dndi[index].dw4.minimum_stmm = 0;
1710
1711     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
1712     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
1713     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
1714     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
1715
1716     sampler_dndi[index].dw6.dn_enable = 1;
1717     sampler_dndi[index].dw6.di_enable = 0;
1718     sampler_dndi[index].dw6.di_partial = 0;
1719     sampler_dndi[index].dw6.dndi_top_first = 1;
1720     sampler_dndi[index].dw6.dndi_stream_id = 1;
1721     sampler_dndi[index].dw6.dndi_first_frame = 1;
1722     sampler_dndi[index].dw6.progressive_dn = 0;
1723     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
1724     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
1725     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
1726
1727     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
1728     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
1729     sampler_dndi[index].dw7.vdi_walker_enable = 0;
1730     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
1731
1732     dri_bo_unmap(pp_context->sampler_state_table.bo);
1733
1734     /* private function & data */
1735     pp_context->pp_x_steps = pp_dn_x_steps;
1736     pp_context->pp_y_steps = pp_dn_y_steps;
1737     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
1738
1739     pp_static_parameter.grf1.statistics_surface_picth = w / 2;
1740     pp_static_parameter.grf1.r1_6.di.top_field_first = 0;
1741     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m2 = 64;
1742     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m1 = 192;
1743
1744     pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
1745     pp_inline_parameter.grf5.number_blocks = w / 16;
1746     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1747     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1748
1749     pp_dn_context->dest_w = w;
1750     pp_dn_context->dest_h = h;
1751 }
1752
1753 static void
1754 ironlake_pp_initialize(
1755     VADriverContextP   ctx,
1756     struct i965_post_processing_context *pp_context,
1757     const struct i965_surface *src_surface,
1758     const VARectangle *src_rect,
1759     const struct i965_surface *dst_surface,
1760     const VARectangle *dst_rect,
1761     int                pp_index,
1762     void *filter_param
1763 )
1764 {
1765     struct i965_driver_data *i965 = i965_driver_data(ctx);
1766     struct pp_module *pp_module;
1767     dri_bo *bo;
1768
1769     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
1770     bo = dri_bo_alloc(i965->intel.bufmgr,
1771                       "surface state & binding table",
1772                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
1773                       4096);
1774     assert(bo);
1775     pp_context->surface_state_binding_table.bo = bo;
1776
1777     dri_bo_unreference(pp_context->curbe.bo);
1778     bo = dri_bo_alloc(i965->intel.bufmgr,
1779                       "constant buffer",
1780                       4096, 
1781                       4096);
1782     assert(bo);
1783     pp_context->curbe.bo = bo;
1784
1785     dri_bo_unreference(pp_context->idrt.bo);
1786     bo = dri_bo_alloc(i965->intel.bufmgr, 
1787                       "interface discriptor", 
1788                       sizeof(struct i965_interface_descriptor), 
1789                       4096);
1790     assert(bo);
1791     pp_context->idrt.bo = bo;
1792     pp_context->idrt.num_interface_descriptors = 0;
1793
1794     dri_bo_unreference(pp_context->sampler_state_table.bo);
1795     bo = dri_bo_alloc(i965->intel.bufmgr, 
1796                       "sampler state table", 
1797                       4096,
1798                       4096);
1799     assert(bo);
1800     dri_bo_map(bo, True);
1801     memset(bo->virtual, 0, bo->size);
1802     dri_bo_unmap(bo);
1803     pp_context->sampler_state_table.bo = bo;
1804
1805     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1806     bo = dri_bo_alloc(i965->intel.bufmgr, 
1807                       "sampler 8x8 state ",
1808                       4096,
1809                       4096);
1810     assert(bo);
1811     pp_context->sampler_state_table.bo_8x8 = bo;
1812
1813     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1814     bo = dri_bo_alloc(i965->intel.bufmgr, 
1815                       "sampler 8x8 state ",
1816                       4096,
1817                       4096);
1818     assert(bo);
1819     pp_context->sampler_state_table.bo_8x8_uv = bo;
1820
1821     dri_bo_unreference(pp_context->vfe_state.bo);
1822     bo = dri_bo_alloc(i965->intel.bufmgr, 
1823                       "vfe state", 
1824                       sizeof(struct i965_vfe_state), 
1825                       4096);
1826     assert(bo);
1827     pp_context->vfe_state.bo = bo;
1828     
1829     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
1830     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
1831     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
1832     pp_context->current_pp = pp_index;
1833     pp_module = &pp_context->pp_modules[pp_index];
1834     
1835     if (pp_module->initialize)
1836         pp_module->initialize(ctx, pp_context,
1837                               src_surface,
1838                               src_rect,
1839                               dst_surface,
1840                               dst_rect,
1841                               filter_param);
1842 }
1843
1844 static void
1845 ironlake_post_processing(
1846     VADriverContextP   ctx,
1847     struct i965_post_processing_context *pp_context,
1848     const struct i965_surface *src_surface,
1849     const VARectangle *src_rect,
1850     const struct i965_surface *dst_surface,
1851     const VARectangle *dst_rect,
1852     int                pp_index,
1853     void *filter_param
1854 )
1855 {
1856     ironlake_pp_initialize(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
1857     ironlake_pp_states_setup(ctx, pp_context);
1858     ironlake_pp_pipeline_setup(ctx, pp_context);
1859 }
1860
1861 static void
1862 gen6_pp_initialize(
1863     VADriverContextP   ctx,
1864     struct i965_post_processing_context *pp_context,
1865     const struct i965_surface *src_surface,
1866     const VARectangle *src_rect,
1867     const struct i965_surface *dst_surface,
1868     const VARectangle *dst_rect,
1869     int                pp_index,
1870     void *filter_param
1871 )
1872 {
1873     struct i965_driver_data *i965 = i965_driver_data(ctx);
1874     struct pp_module *pp_module;
1875     dri_bo *bo;
1876
1877     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
1878     bo = dri_bo_alloc(i965->intel.bufmgr,
1879                       "surface state & binding table",
1880                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
1881                       4096);
1882     assert(bo);
1883     pp_context->surface_state_binding_table.bo = bo;
1884
1885     dri_bo_unreference(pp_context->curbe.bo);
1886     bo = dri_bo_alloc(i965->intel.bufmgr,
1887                       "constant buffer",
1888                       4096, 
1889                       4096);
1890     assert(bo);
1891     pp_context->curbe.bo = bo;
1892
1893     dri_bo_unreference(pp_context->idrt.bo);
1894     bo = dri_bo_alloc(i965->intel.bufmgr, 
1895                       "interface discriptor", 
1896                       sizeof(struct gen6_interface_descriptor_data), 
1897                       4096);
1898     assert(bo);
1899     pp_context->idrt.bo = bo;
1900     pp_context->idrt.num_interface_descriptors = 0;
1901
1902     dri_bo_unreference(pp_context->sampler_state_table.bo);
1903     bo = dri_bo_alloc(i965->intel.bufmgr, 
1904                       "sampler state table", 
1905                       4096,
1906                       4096);
1907     assert(bo);
1908     dri_bo_map(bo, True);
1909     memset(bo->virtual, 0, bo->size);
1910     dri_bo_unmap(bo);
1911     pp_context->sampler_state_table.bo = bo;
1912
1913     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1914     bo = dri_bo_alloc(i965->intel.bufmgr, 
1915                       "sampler 8x8 state ",
1916                       4096,
1917                       4096);
1918     assert(bo);
1919     pp_context->sampler_state_table.bo_8x8 = bo;
1920
1921     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1922     bo = dri_bo_alloc(i965->intel.bufmgr, 
1923                       "sampler 8x8 state ",
1924                       4096,
1925                       4096);
1926     assert(bo);
1927     pp_context->sampler_state_table.bo_8x8_uv = bo;
1928
1929     dri_bo_unreference(pp_context->vfe_state.bo);
1930     bo = dri_bo_alloc(i965->intel.bufmgr, 
1931                       "vfe state", 
1932                       sizeof(struct i965_vfe_state), 
1933                       4096);
1934     assert(bo);
1935     pp_context->vfe_state.bo = bo;
1936     
1937     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
1938     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
1939     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
1940     pp_context->current_pp = pp_index;
1941     pp_module = &pp_context->pp_modules[pp_index];
1942     
1943     if (pp_module->initialize)
1944         pp_module->initialize(ctx, pp_context,
1945                               src_surface,
1946                               src_rect,
1947                               dst_surface,
1948                               dst_rect,
1949                               filter_param);
1950 }
1951
1952 static void
1953 gen6_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1954 {
1955     struct gen6_interface_descriptor_data *desc;
1956     dri_bo *bo;
1957     int pp_index = pp_context->current_pp;
1958
1959     bo = pp_context->idrt.bo;
1960     dri_bo_map(bo, True);
1961     assert(bo->virtual);
1962     desc = bo->virtual;
1963     memset(desc, 0, sizeof(*desc));
1964     desc->desc0.kernel_start_pointer = 
1965         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
1966     desc->desc1.single_program_flow = 1;
1967     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
1968     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
1969     desc->desc2.sampler_state_pointer = 
1970         pp_context->sampler_state_table.bo->offset >> 5;
1971     desc->desc3.binding_table_entry_count = 0;
1972     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
1973     desc->desc4.constant_urb_entry_read_offset = 0;
1974     desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
1975
1976     dri_bo_emit_reloc(bo,
1977                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1978                       0,
1979                       offsetof(struct gen6_interface_descriptor_data, desc0),
1980                       pp_context->pp_modules[pp_index].kernel.bo);
1981
1982     dri_bo_emit_reloc(bo,
1983                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1984                       desc->desc2.sampler_count << 2,
1985                       offsetof(struct gen6_interface_descriptor_data, desc2),
1986                       pp_context->sampler_state_table.bo);
1987
1988     dri_bo_unmap(bo);
1989     pp_context->idrt.num_interface_descriptors++;
1990 }
1991
1992 static void
1993 gen6_pp_upload_constants(struct i965_post_processing_context *pp_context)
1994 {
1995     unsigned char *constant_buffer;
1996
1997     assert(sizeof(pp_static_parameter) == 128);
1998     dri_bo_map(pp_context->curbe.bo, 1);
1999     assert(pp_context->curbe.bo->virtual);
2000     constant_buffer = pp_context->curbe.bo->virtual;
2001     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
2002     dri_bo_unmap(pp_context->curbe.bo);
2003 }
2004
2005 static void
2006 gen6_pp_states_setup(VADriverContextP ctx,
2007                      struct i965_post_processing_context *pp_context)
2008 {
2009     gen6_pp_interface_descriptor_table(pp_context);
2010     gen6_pp_upload_constants(pp_context);
2011 }
2012
2013 static void
2014 gen6_pp_pipeline_select(VADriverContextP ctx,
2015                         struct i965_post_processing_context *pp_context)
2016 {
2017     struct intel_batchbuffer *batch = pp_context->batch;
2018
2019     BEGIN_BATCH(batch, 1);
2020     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
2021     ADVANCE_BATCH(batch);
2022 }
2023
2024 static void
2025 gen6_pp_state_base_address(VADriverContextP ctx,
2026                            struct i965_post_processing_context *pp_context)
2027 {
2028     struct intel_batchbuffer *batch = pp_context->batch;
2029
2030     BEGIN_BATCH(batch, 10);
2031     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2032     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2033     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2034     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2035     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2036     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2037     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2038     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2039     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2040     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2041     ADVANCE_BATCH(batch);
2042 }
2043
2044 static void
2045 gen6_pp_vfe_state(VADriverContextP ctx,
2046                   struct i965_post_processing_context *pp_context)
2047 {
2048     struct intel_batchbuffer *batch = pp_context->batch;
2049
2050     BEGIN_BATCH(batch, 8);
2051     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
2052     OUT_BATCH(batch, 0);
2053     OUT_BATCH(batch,
2054               (pp_context->urb.num_vfe_entries - 1) << 16 |
2055               pp_context->urb.num_vfe_entries << 8);
2056     OUT_BATCH(batch, 0);
2057     OUT_BATCH(batch,
2058               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* in 256 bits unit */
2059               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 - 1));            /* in 256 bits unit */
2060     OUT_BATCH(batch, 0);
2061     OUT_BATCH(batch, 0);
2062     OUT_BATCH(batch, 0);
2063     ADVANCE_BATCH(batch);
2064 }
2065
2066 static void
2067 gen6_pp_curbe_load(VADriverContextP ctx,
2068                    struct i965_post_processing_context *pp_context)
2069 {
2070     struct intel_batchbuffer *batch = pp_context->batch;
2071
2072     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512 <= pp_context->curbe.bo->size);
2073
2074     BEGIN_BATCH(batch, 4);
2075     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
2076     OUT_BATCH(batch, 0);
2077     OUT_BATCH(batch,
2078               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512);
2079     OUT_RELOC(batch, 
2080               pp_context->curbe.bo,
2081               I915_GEM_DOMAIN_INSTRUCTION, 0,
2082               0);
2083     ADVANCE_BATCH(batch);
2084 }
2085
2086 static void
2087 gen6_interface_descriptor_load(VADriverContextP ctx,
2088                                struct i965_post_processing_context *pp_context)
2089 {
2090     struct intel_batchbuffer *batch = pp_context->batch;
2091
2092     BEGIN_BATCH(batch, 4);
2093     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
2094     OUT_BATCH(batch, 0);
2095     OUT_BATCH(batch,
2096               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
2097     OUT_RELOC(batch, 
2098               pp_context->idrt.bo,
2099               I915_GEM_DOMAIN_INSTRUCTION, 0,
2100               0);
2101     ADVANCE_BATCH(batch);
2102 }
2103
2104 static void
2105 gen6_pp_object_walker(VADriverContextP ctx,
2106                       struct i965_post_processing_context *pp_context)
2107 {
2108     struct intel_batchbuffer *batch = pp_context->batch;
2109     int x, x_steps, y, y_steps;
2110
2111     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
2112     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
2113
2114     for (y = 0; y < y_steps; y++) {
2115         for (x = 0; x < x_steps; x++) {
2116             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
2117                 BEGIN_BATCH(batch, 22);
2118                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 20);
2119                 OUT_BATCH(batch, 0);
2120                 OUT_BATCH(batch, 0); /* no indirect data */
2121                 OUT_BATCH(batch, 0);
2122                 OUT_BATCH(batch, 0); /* scoreboard */
2123                 OUT_BATCH(batch, 0);
2124
2125                 /* inline data grf 5-6 */
2126                 assert(sizeof(pp_inline_parameter) == 64);
2127                 intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
2128
2129                 ADVANCE_BATCH(batch);
2130             }
2131         }
2132     }
2133 }
2134
2135 static void
2136 gen6_pp_pipeline_setup(VADriverContextP ctx,
2137                        struct i965_post_processing_context *pp_context)
2138 {
2139     struct intel_batchbuffer *batch = pp_context->batch;
2140
2141     intel_batchbuffer_start_atomic(batch, 0x1000);
2142     intel_batchbuffer_emit_mi_flush(batch);
2143     gen6_pp_pipeline_select(ctx, pp_context);
2144     gen6_pp_state_base_address(ctx, pp_context);
2145     gen6_pp_curbe_load(ctx, pp_context);
2146     gen6_interface_descriptor_load(ctx, pp_context);
2147     gen6_pp_vfe_state(ctx, pp_context);
2148     gen6_pp_object_walker(ctx, pp_context);
2149     intel_batchbuffer_end_atomic(batch);
2150 }
2151
2152 static void
2153 gen6_post_processing(
2154     VADriverContextP   ctx,
2155     struct i965_post_processing_context *pp_context,
2156     const struct i965_surface *src_surface,
2157     const VARectangle *src_rect,
2158     const struct i965_surface *dst_surface,
2159     const VARectangle *dst_rect,
2160     int                pp_index,
2161     void * filter_param
2162 )
2163 {
2164     gen6_pp_initialize(ctx, pp_context,
2165                        src_surface,
2166                        src_rect,
2167                        dst_surface,
2168                        dst_rect,
2169                        pp_index,
2170                        filter_param);
2171     gen6_pp_states_setup(ctx, pp_context);
2172     gen6_pp_pipeline_setup(ctx, pp_context);
2173 }
2174
2175 static void
2176 i965_post_processing_internal(
2177     VADriverContextP   ctx,
2178     struct i965_post_processing_context *pp_context,
2179     const struct i965_surface *src_surface,
2180     const VARectangle *src_rect,
2181     const struct i965_surface *dst_surface,
2182     const VARectangle *dst_rect,
2183     int                pp_index,
2184     void *filter_param
2185 )
2186 {
2187     struct i965_driver_data *i965 = i965_driver_data(ctx);
2188
2189     if (IS_GEN6(i965->intel.device_id) ||
2190         IS_GEN7(i965->intel.device_id))
2191         gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
2192     else
2193         ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
2194 }
2195
2196 VAStatus 
2197 i965_DestroySurfaces(VADriverContextP ctx,
2198                      VASurfaceID *surface_list,
2199                      int num_surfaces);
2200 VAStatus 
2201 i965_CreateSurfaces(VADriverContextP ctx,
2202                     int width,
2203                     int height,
2204                     int format,
2205                     int num_surfaces,
2206                     VASurfaceID *surfaces);
2207 VASurfaceID
2208 i965_post_processing(
2209     VADriverContextP   ctx,
2210     VASurfaceID        surface,
2211     const VARectangle *src_rect,
2212     const VARectangle *dst_rect,
2213     unsigned int       flags,
2214     int               *has_done_scaling  
2215 )
2216 {
2217     struct i965_driver_data *i965 = i965_driver_data(ctx);
2218     VASurfaceID in_surface_id = surface;
2219     VASurfaceID out_surface_id = VA_INVALID_ID;
2220     
2221     *has_done_scaling = 0;
2222
2223     if (HAS_PP(i965)) {
2224         struct object_surface *obj_surface;
2225         VAStatus status;
2226         struct i965_surface src_surface;
2227         struct i965_surface dst_surface;
2228
2229         obj_surface = SURFACE(in_surface_id);
2230
2231         /* Currently only support post processing for NV12 surface */
2232         if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
2233             return out_surface_id;
2234
2235         if (flags & I965_PP_FLAG_DEINTERLACING) {
2236             status = i965_CreateSurfaces(ctx,
2237                                          obj_surface->orig_width,
2238                                          obj_surface->orig_height,
2239                                          VA_RT_FORMAT_YUV420,
2240                                          1,
2241                                          &out_surface_id);
2242             assert(status == VA_STATUS_SUCCESS);
2243             obj_surface = SURFACE(out_surface_id);
2244             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
2245
2246             src_surface.id = in_surface_id;
2247             src_surface.flag = I965_SURFACE_SURFACE;
2248             dst_surface.id = out_surface_id;
2249             dst_surface.flag = I965_SURFACE_SURFACE;
2250
2251             i965_post_processing_internal(ctx, i965->pp_context,
2252                                           &src_surface,
2253                                           src_rect,
2254                                           &dst_surface,
2255                                           dst_rect,
2256                                           PP_NV12_DNDI,
2257                                           NULL);
2258         }
2259
2260         if (flags & I965_PP_FLAG_AVS) {
2261             struct i965_render_state *render_state = &i965->render_state;
2262             struct intel_region *dest_region = render_state->draw_region;
2263
2264             if (out_surface_id != VA_INVALID_ID)
2265                 in_surface_id = out_surface_id;
2266
2267             status = i965_CreateSurfaces(ctx,
2268                                          dest_region->width,
2269                                          dest_region->height,
2270                                          VA_RT_FORMAT_YUV420,
2271                                          1,
2272                                          &out_surface_id);
2273             assert(status == VA_STATUS_SUCCESS);
2274             obj_surface = SURFACE(out_surface_id);
2275             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
2276
2277             src_surface.id = in_surface_id;
2278             src_surface.flag = I965_SURFACE_SURFACE;
2279             dst_surface.id = out_surface_id;
2280             dst_surface.flag = I965_SURFACE_SURFACE;
2281
2282             i965_post_processing_internal(ctx, i965->pp_context,
2283                                           &src_surface,
2284                                           src_rect,
2285                                           &dst_surface,
2286                                           dst_rect,
2287                                           PP_NV12_AVS,
2288                                           NULL);
2289
2290             if (in_surface_id != surface)
2291                 i965_DestroySurfaces(ctx, &in_surface_id, 1);
2292                 
2293             *has_done_scaling = 1;
2294         }
2295     }
2296
2297     return out_surface_id;
2298 }       
2299
2300 static VAStatus
2301 i965_image_i420_processing(VADriverContextP ctx,
2302                            const struct i965_surface *src_surface,
2303                            const VARectangle *src_rect,
2304                            const struct i965_surface *dst_surface,
2305                            const VARectangle *dst_rect)
2306 {
2307     struct i965_driver_data *i965 = i965_driver_data(ctx);
2308     struct i965_post_processing_context *pp_context = i965->pp_context;
2309     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
2310
2311     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
2312         i965_post_processing_internal(ctx, i965->pp_context,
2313                                       src_surface,
2314                                       src_rect,
2315                                       dst_surface,
2316                                       dst_rect,
2317                                       PP_PL3_LOAD_SAVE_N12,
2318                                       NULL);
2319     } else {
2320         i965_post_processing_internal(ctx, i965->pp_context,
2321                                       src_surface,
2322                                       src_rect,
2323                                       dst_surface,
2324                                       dst_rect,
2325                                       PP_PL3_LOAD_SAVE_PL3,
2326                                       NULL);
2327     }
2328
2329     intel_batchbuffer_flush(pp_context->batch);
2330
2331     return VA_STATUS_SUCCESS;
2332 }
2333
2334 static VAStatus
2335 i965_image_nv12_processing(VADriverContextP ctx,
2336                            const struct i965_surface *src_surface,
2337                            const VARectangle *src_rect,
2338                            const struct i965_surface *dst_surface,
2339                            const VARectangle *dst_rect)
2340 {
2341     struct i965_driver_data *i965 = i965_driver_data(ctx);
2342     struct i965_post_processing_context *pp_context = i965->pp_context;
2343     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
2344
2345     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
2346         i965_post_processing_internal(ctx, i965->pp_context,
2347                                       src_surface,
2348                                       src_rect,
2349                                       dst_surface,
2350                                       dst_rect,
2351                                       PP_NV12_LOAD_SAVE_N12,
2352                                       NULL);
2353     } else {
2354         i965_post_processing_internal(ctx, i965->pp_context,
2355                                       src_surface,
2356                                       src_rect,
2357                                       dst_surface,
2358                                       dst_rect,
2359                                       PP_NV12_LOAD_SAVE_PL3,
2360                                       NULL);
2361     }
2362
2363     intel_batchbuffer_flush(pp_context->batch);
2364
2365     return VA_STATUS_SUCCESS;
2366 }
2367
2368 VAStatus
2369 i965_image_processing(VADriverContextP ctx,
2370                       const struct i965_surface *src_surface,
2371                       const VARectangle *src_rect,
2372                       const struct i965_surface *dst_surface,
2373                       const VARectangle *dst_rect)
2374 {
2375     struct i965_driver_data *i965 = i965_driver_data(ctx);
2376     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
2377
2378     if (HAS_PP(i965)) {
2379         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
2380
2381         switch (fourcc) {
2382         case VA_FOURCC('Y', 'V', '1', '2'):
2383         case VA_FOURCC('I', '4', '2', '0'):
2384             status = i965_image_i420_processing(ctx,
2385                                                 src_surface,
2386                                                 src_rect,
2387                                                 dst_surface,
2388                                                 dst_rect);
2389             break;
2390
2391         case  VA_FOURCC('N', 'V', '1', '2'):
2392             status = i965_image_nv12_processing(ctx,
2393                                                 src_surface,
2394                                                 src_rect,
2395                                                 dst_surface,
2396                                                 dst_rect);
2397             break;
2398
2399         default:
2400             status = VA_STATUS_ERROR_UNIMPLEMENTED;
2401             break;
2402         }
2403     }
2404
2405     return status;
2406 }       
2407
2408 static void
2409 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
2410 {
2411     int i;
2412
2413     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
2414     pp_context->surface_state_binding_table.bo = NULL;
2415
2416     dri_bo_unreference(pp_context->curbe.bo);
2417     pp_context->curbe.bo = NULL;
2418
2419     dri_bo_unreference(pp_context->sampler_state_table.bo);
2420     pp_context->sampler_state_table.bo = NULL;
2421
2422     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
2423     pp_context->sampler_state_table.bo_8x8 = NULL;
2424
2425     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
2426     pp_context->sampler_state_table.bo_8x8_uv = NULL;
2427
2428     dri_bo_unreference(pp_context->idrt.bo);
2429     pp_context->idrt.bo = NULL;
2430     pp_context->idrt.num_interface_descriptors = 0;
2431
2432     dri_bo_unreference(pp_context->vfe_state.bo);
2433     pp_context->vfe_state.bo = NULL;
2434
2435     dri_bo_unreference(pp_context->stmm.bo);
2436     pp_context->stmm.bo = NULL;
2437
2438     for (i = 0; i < NUM_PP_MODULES; i++) {
2439         struct pp_module *pp_module = &pp_context->pp_modules[i];
2440
2441         dri_bo_unreference(pp_module->kernel.bo);
2442         pp_module->kernel.bo = NULL;
2443     }
2444
2445 }
2446
2447 Bool
2448 i965_post_processing_terminate(VADriverContextP ctx)
2449 {
2450     struct i965_driver_data *i965 = i965_driver_data(ctx);
2451     struct i965_post_processing_context *pp_context = i965->pp_context;
2452
2453     if (pp_context) {
2454         i965_post_processing_context_finalize(pp_context);
2455         free(pp_context);
2456     }
2457
2458     i965->pp_context = NULL;
2459
2460     return True;
2461 }
2462
2463 static void
2464 i965_post_processing_context_init(VADriverContextP ctx,
2465                                   struct i965_post_processing_context *pp_context,
2466                                   struct intel_batchbuffer *batch)
2467 {
2468     struct i965_driver_data *i965 = i965_driver_data(ctx);
2469     int i;
2470
2471     pp_context->urb.size = URB_SIZE((&i965->intel));
2472     pp_context->urb.num_vfe_entries = 32;
2473     pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
2474     pp_context->urb.num_cs_entries = 1;
2475     pp_context->urb.size_cs_entry = 2;      /* in 512 bits unit */
2476     pp_context->urb.vfe_start = 0;
2477     pp_context->urb.cs_start = pp_context->urb.vfe_start + 
2478         pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
2479     assert(pp_context->urb.cs_start + 
2480            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
2481
2482     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
2483     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
2484
2485     if (IS_GEN6(i965->intel.device_id) ||
2486         IS_GEN7(i965->intel.device_id))
2487         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
2488     else if (IS_IRONLAKE(i965->intel.device_id))
2489         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
2490
2491     for (i = 0; i < NUM_PP_MODULES; i++) {
2492         struct pp_module *pp_module = &pp_context->pp_modules[i];
2493         dri_bo_unreference(pp_module->kernel.bo);
2494         if (pp_module->kernel.bin) {
2495             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
2496                                                 pp_module->kernel.name,
2497                                                 pp_module->kernel.size,
2498                                                 4096);
2499             assert(pp_module->kernel.bo);
2500             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
2501         } else {
2502             pp_module->kernel.bo = NULL;
2503         }
2504     }
2505
2506     pp_context->batch = batch;
2507 }
2508
2509 Bool
2510 i965_post_processing_init(VADriverContextP ctx)
2511 {
2512     struct i965_driver_data *i965 = i965_driver_data(ctx);
2513     struct i965_post_processing_context *pp_context = i965->pp_context;
2514
2515     if (HAS_PP(i965)) {
2516         if (pp_context == NULL) {
2517             pp_context = calloc(1, sizeof(*pp_context));
2518             i965_post_processing_context_init(ctx, pp_context, i965->batch);
2519             i965->pp_context = pp_context;
2520         }
2521     }
2522
2523     return True;
2524 }
2525
2526 static const int procfilter_to_pp_flag[10] = {
2527     PP_NULL,    /* VAProcFilterNone */
2528     PP_NULL,    /* VAProcFilterDering */
2529     PP_NULL,    /* VAProcFilterDeblocking */
2530     PP_NV12_DN, /* VAProcFilterNoiseReduction */
2531     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
2532     PP_NULL,    /* VAProcFilterSharpening */
2533     PP_NULL,    /* VAProcFilterColorEnhancement */
2534     PP_NULL,    /* VAProcFilterProcAmp */
2535     PP_NULL,    /* VAProcFilterComposition */
2536     PP_NULL,    /* VAProcFilterFrameRateConversion */
2537 };
2538
2539 static void 
2540 i965_proc_picture(VADriverContextP ctx, 
2541                   VAProfile profile, 
2542                   union codec_state *codec_state,
2543                   struct hw_context *hw_context)
2544 {
2545     struct i965_driver_data *i965 = i965_driver_data(ctx);
2546     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
2547     struct proc_state *proc_state = &codec_state->proc;
2548     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
2549     VAProcInputParameterBuffer *input_param = (VAProcInputParameterBuffer *)proc_state->input_param->buffer;
2550     struct object_surface *obj_surface;
2551     struct i965_surface src_surface, dst_surface;
2552     VAStatus status;
2553     int i;
2554     VASurfaceID tmp_surfaces[VA_PROC_PIPELINE_MAX_NUM_FILTERS];
2555     int num_tmp_surfaces = 0;
2556
2557     assert(input_param->surface != VA_INVALID_ID);
2558     assert(proc_state->current_render_target != VA_INVALID_ID);
2559
2560     obj_surface = SURFACE(proc_state->current_render_target);
2561     i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
2562
2563     obj_surface = SURFACE(input_param->surface);
2564     assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'));
2565
2566     src_surface.id = input_param->surface;
2567     src_surface.flag = I965_SURFACE_SURFACE;
2568     
2569     for (i = 0; i < VA_PROC_PIPELINE_MAX_NUM_FILTERS; i++) {
2570         VAProcFilterType filter_type = pipeline_param->filter_pipeline[i];
2571         VASurfaceID out_surface_id = VA_INVALID_ID;
2572         void *filter_param = NULL;
2573
2574         if (procfilter_to_pp_flag[filter_type] != PP_NULL) {
2575             if (proc_state->filter_param[filter_type])
2576                 filter_param = proc_state->filter_param[filter_type]->buffer;
2577
2578             status = i965_CreateSurfaces(ctx,
2579                                          obj_surface->orig_width,
2580                                          obj_surface->orig_height,
2581                                          VA_RT_FORMAT_YUV420,
2582                                          1,
2583                                          &out_surface_id);
2584             assert(status == VA_STATUS_SUCCESS);
2585             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
2586             obj_surface = SURFACE(out_surface_id);
2587             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
2588             dst_surface.id = out_surface_id;
2589             dst_surface.flag = I965_SURFACE_SURFACE;
2590             i965_post_processing_internal(ctx, &proc_context->pp_context,
2591                                           &src_surface,
2592                                           &input_param->region,
2593                                           &dst_surface,
2594                                           &input_param->region,
2595                                           procfilter_to_pp_flag[filter_type],
2596                                           filter_param);
2597             src_surface.id = dst_surface.id;
2598         }
2599     }
2600
2601     dst_surface.id = proc_state->current_render_target;
2602     dst_surface.flag = I965_SURFACE_SURFACE;
2603     i965_post_processing_internal(ctx, &proc_context->pp_context,
2604                                   &src_surface,
2605                                   &input_param->region,
2606                                   &dst_surface,
2607                                   &pipeline_param->output_region,
2608                                   PP_NV12_AVS,
2609                                   NULL);
2610
2611     if (num_tmp_surfaces)
2612         i965_DestroySurfaces(ctx,
2613                              tmp_surfaces,
2614                              num_tmp_surfaces);
2615
2616     intel_batchbuffer_flush(hw_context->batch);
2617 }
2618
2619 static void
2620 i965_proc_context_destroy(void *hw_context)
2621 {
2622     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
2623
2624     i965_post_processing_context_finalize(&proc_context->pp_context);
2625     intel_batchbuffer_free(proc_context->base.batch);
2626     free(proc_context);
2627 }
2628
2629 struct hw_context *
2630 i965_proc_context_init(VADriverContextP ctx, VAProfile profile)
2631 {
2632     struct intel_driver_data *intel = intel_driver_data(ctx);
2633     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
2634
2635     proc_context->base.destroy = i965_proc_context_destroy;
2636     proc_context->base.run = i965_proc_picture;
2637     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
2638     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
2639
2640     return (struct hw_context *)proc_context;
2641 }