Support parameter for VAProcFilterNoiseReduction
[profile/ivi/vaapi-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include <va/va_backend.h>
35
36 #include "intel_batchbuffer.h"
37 #include "intel_driver.h"
38 #include "i965_defines.h"
39 #include "i965_structs.h"
40 #include "i965_drv_video.h"
41 #include "i965_post_processing.h"
42 #include "i965_render.h"
43
44 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
45                      IS_GEN6((ctx)->intel.device_id) ||         \
46                      IS_GEN7((ctx)->intel.device_id))
47
48 #define SURFACE_STATE_PADDED_SIZE_0_I965        ALIGN(sizeof(struct i965_surface_state), 32)
49 #define SURFACE_STATE_PADDED_SIZE_1_I965        ALIGN(sizeof(struct i965_surface_state2), 32)
50 #define SURFACE_STATE_PADDED_SIZE_I965          MAX(SURFACE_STATE_PADDED_SIZE_0_I965, SURFACE_STATE_PADDED_SIZE_1_I965)
51
52 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_I965
53 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
54 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
55
56 static const uint32_t pp_null_gen5[][4] = {
57 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
58 };
59
60 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
61 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
62 };
63
64 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
65 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
66 };
67
68 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
69 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
70 };
71
72 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
73 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
74 };
75
76 static const uint32_t pp_nv12_scaling_gen5[][4] = {
77 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
78 };
79
80 static const uint32_t pp_nv12_avs_gen5[][4] = {
81 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
82 };
83
84 static const uint32_t pp_nv12_dndi_gen5[][4] = {
85 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
86 };
87
88 static void pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
89                                const struct i965_surface *src_surface,
90                                const VARectangle *src_rect,
91                                const struct i965_surface *dst_surface,
92                                const VARectangle *dst_rect,
93                                void *filter_param);
94 static void pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
95                                    const struct i965_surface *src_surface,
96                                    const VARectangle *src_rect,
97                                    const struct i965_surface *dst_surface,
98                                    const VARectangle *dst_rect,
99                                    void *filter_param);
100 static void pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
101                                        const struct i965_surface *src_surface,
102                                        const VARectangle *src_rect,
103                                        const struct i965_surface *dst_surface,
104                                        const VARectangle *dst_rect,
105                                        void *filter_param);
106 static void pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
107                                             const struct i965_surface *src_surface,
108                                             const VARectangle *src_rect,
109                                             const struct i965_surface *dst_surface,
110                                             const VARectangle *dst_rect,
111                                             void *filter_param);
112 static void pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
113                                     const struct i965_surface *src_surface,
114                                     const VARectangle *src_rect,
115                                     const struct i965_surface *dst_surface,
116                                     const VARectangle *dst_rect,
117                                     void *filter_param);
118
119 static struct pp_module pp_modules_gen5[] = {
120     {
121         {
122             "NULL module (for testing)",
123             PP_NULL,
124             pp_null_gen5,
125             sizeof(pp_null_gen5),
126             NULL,
127         },
128
129         pp_null_initialize,
130     },
131
132     {
133         {
134             "NV12_NV12",
135             PP_NV12_LOAD_SAVE_N12,
136             pp_nv12_load_save_nv12_gen5,
137             sizeof(pp_nv12_load_save_nv12_gen5),
138             NULL,
139         },
140
141         pp_plx_load_save_plx_initialize,
142     },
143
144     {
145         {
146             "NV12_PL3",
147             PP_NV12_LOAD_SAVE_PL3,
148             pp_nv12_load_save_pl3_gen5,
149             sizeof(pp_nv12_load_save_pl3_gen5),
150             NULL,
151         },
152
153         pp_plx_load_save_plx_initialize,
154     },
155
156     {
157         {
158             "PL3_NV12",
159             PP_PL3_LOAD_SAVE_N12,
160             pp_pl3_load_save_nv12_gen5,
161             sizeof(pp_pl3_load_save_nv12_gen5),
162             NULL,
163         },
164
165         pp_plx_load_save_plx_initialize,
166     },
167
168     {
169         {
170             "PL3_PL3",
171             PP_PL3_LOAD_SAVE_N12,
172             pp_pl3_load_save_pl3_gen5,
173             sizeof(pp_pl3_load_save_pl3_gen5),
174             NULL,
175         },
176
177         pp_plx_load_save_plx_initialize
178     },
179
180     {
181         {
182             "NV12 Scaling module",
183             PP_NV12_SCALING,
184             pp_nv12_scaling_gen5,
185             sizeof(pp_nv12_scaling_gen5),
186             NULL,
187         },
188
189         pp_nv12_scaling_initialize,
190     },
191
192     {
193         {
194             "NV12 AVS module",
195             PP_NV12_AVS,
196             pp_nv12_avs_gen5,
197             sizeof(pp_nv12_avs_gen5),
198             NULL,
199         },
200
201         pp_nv12_avs_initialize,
202     },
203
204     {
205         {
206             "NV12 DNDI module",
207             PP_NV12_DNDI,
208             pp_nv12_dndi_gen5,
209             sizeof(pp_nv12_dndi_gen5),
210             NULL,
211         },
212
213         pp_nv12_dndi_initialize,
214     },
215 };
216
217 static const uint32_t pp_null_gen6[][4] = {
218 #include "shaders/post_processing/gen5_6/null.g6b"
219 };
220
221 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
222 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
223 };
224
225 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
226 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
227 };
228
229 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
230 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
231 };
232
233 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
234 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
235 };
236
237 static const uint32_t pp_nv12_scaling_gen6[][4] = {
238 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g6b"
239 };
240
241 static const uint32_t pp_nv12_avs_gen6[][4] = {
242 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
243 };
244
245 static const uint32_t pp_nv12_dndi_gen6[][4] = {
246 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
247 };
248
249 static struct pp_module pp_modules_gen6[] = {
250     {
251         {
252             "NULL module (for testing)",
253             PP_NULL,
254             pp_null_gen6,
255             sizeof(pp_null_gen6),
256             NULL,
257         },
258
259         pp_null_initialize,
260     },
261
262     {
263         {
264             "NV12_NV12",
265             PP_NV12_LOAD_SAVE_N12,
266             pp_nv12_load_save_nv12_gen6,
267             sizeof(pp_nv12_load_save_nv12_gen6),
268             NULL,
269         },
270
271         pp_plx_load_save_plx_initialize,
272     },
273
274     {
275         {
276             "NV12_PL3",
277             PP_NV12_LOAD_SAVE_PL3,
278             pp_nv12_load_save_pl3_gen6,
279             sizeof(pp_nv12_load_save_pl3_gen6),
280             NULL,
281         },
282         
283         pp_plx_load_save_plx_initialize,
284     },
285
286     {
287         {
288             "PL3_NV12",
289             PP_PL3_LOAD_SAVE_N12,
290             pp_pl3_load_save_nv12_gen6,
291             sizeof(pp_pl3_load_save_nv12_gen6),
292             NULL,
293         },
294
295         pp_plx_load_save_plx_initialize,
296     },
297
298     {
299         {
300             "PL3_PL3",
301             PP_PL3_LOAD_SAVE_N12,
302             pp_pl3_load_save_pl3_gen6,
303             sizeof(pp_pl3_load_save_pl3_gen6),
304             NULL,
305         },
306
307         pp_plx_load_save_plx_initialize,
308     },
309
310     {
311         {
312             "NV12 Scaling module",
313             PP_NV12_SCALING,
314             pp_nv12_scaling_gen6,
315             sizeof(pp_nv12_scaling_gen6),
316             NULL,
317         },
318
319         pp_nv12_scaling_initialize,
320     },
321
322     {
323         {
324             "NV12 AVS module",
325             PP_NV12_AVS,
326             pp_nv12_avs_gen6,
327             sizeof(pp_nv12_avs_gen6),
328             NULL,
329         },
330
331         pp_nv12_avs_initialize,
332     },
333
334     {
335         {
336             "NV12 DNDI module",
337             PP_NV12_DNDI,
338             pp_nv12_dndi_gen6,
339             sizeof(pp_nv12_dndi_gen6),
340             NULL,
341         },
342
343         pp_nv12_dndi_initialize,
344     },
345 };
346
347 #define pp_static_parameter     pp_context->pp_static_parameter
348 #define pp_inline_parameter     pp_context->pp_inline_parameter
349
350 static int
351 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
352 {
353     struct i965_driver_data *i965 = i965_driver_data(ctx);
354     int fourcc;
355
356     if (surface->flag == I965_SURFACE_IMAGE) {
357         struct object_image *obj_image = IMAGE(surface->id);
358         fourcc = obj_image->image.format.fourcc;
359     } else {
360         struct object_surface *obj_surface = SURFACE(surface->id);
361         fourcc = obj_surface->fourcc;
362     }
363
364     return fourcc;
365 }
366
367 static void
368 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
369 {
370     switch (tiling) {
371     case I915_TILING_NONE:
372         ss->ss3.tiled_surface = 0;
373         ss->ss3.tile_walk = 0;
374         break;
375     case I915_TILING_X:
376         ss->ss3.tiled_surface = 1;
377         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
378         break;
379     case I915_TILING_Y:
380         ss->ss3.tiled_surface = 1;
381         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
382         break;
383     }
384 }
385
386 static void
387 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
388 {
389     switch (tiling) {
390     case I915_TILING_NONE:
391         ss->ss2.tiled_surface = 0;
392         ss->ss2.tile_walk = 0;
393         break;
394     case I915_TILING_X:
395         ss->ss2.tiled_surface = 1;
396         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
397         break;
398     case I915_TILING_Y:
399         ss->ss2.tiled_surface = 1;
400         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
401         break;
402     }
403 }
404
405 static void
406 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
407 {
408     struct i965_interface_descriptor *desc;
409     dri_bo *bo;
410     int pp_index = pp_context->current_pp;
411
412     bo = pp_context->idrt.bo;
413     dri_bo_map(bo, 1);
414     assert(bo->virtual);
415     desc = bo->virtual;
416     memset(desc, 0, sizeof(*desc));
417     desc->desc0.grf_reg_blocks = 10;
418     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
419     desc->desc1.const_urb_entry_read_offset = 0;
420     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
421     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
422     desc->desc2.sampler_count = 0;
423     desc->desc3.binding_table_entry_count = 0;
424     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
425
426     dri_bo_emit_reloc(bo,
427                       I915_GEM_DOMAIN_INSTRUCTION, 0,
428                       desc->desc0.grf_reg_blocks,
429                       offsetof(struct i965_interface_descriptor, desc0),
430                       pp_context->pp_modules[pp_index].kernel.bo);
431
432     dri_bo_emit_reloc(bo,
433                       I915_GEM_DOMAIN_INSTRUCTION, 0,
434                       desc->desc2.sampler_count << 2,
435                       offsetof(struct i965_interface_descriptor, desc2),
436                       pp_context->sampler_state_table.bo);
437
438     dri_bo_unmap(bo);
439     pp_context->idrt.num_interface_descriptors++;
440 }
441
442 static void
443 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
444 {
445     struct i965_vfe_state *vfe_state;
446     dri_bo *bo;
447
448     bo = pp_context->vfe_state.bo;
449     dri_bo_map(bo, 1);
450     assert(bo->virtual);
451     vfe_state = bo->virtual;
452     memset(vfe_state, 0, sizeof(*vfe_state));
453     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
454     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
455     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
456     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
457     vfe_state->vfe1.children_present = 0;
458     vfe_state->vfe2.interface_descriptor_base = 
459         pp_context->idrt.bo->offset >> 4; /* reloc */
460     dri_bo_emit_reloc(bo,
461                       I915_GEM_DOMAIN_INSTRUCTION, 0,
462                       0,
463                       offsetof(struct i965_vfe_state, vfe2),
464                       pp_context->idrt.bo);
465     dri_bo_unmap(bo);
466 }
467
468 static void
469 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
470 {
471     unsigned char *constant_buffer;
472
473     assert(sizeof(pp_static_parameter) == 128);
474     dri_bo_map(pp_context->curbe.bo, 1);
475     assert(pp_context->curbe.bo->virtual);
476     constant_buffer = pp_context->curbe.bo->virtual;
477     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
478     dri_bo_unmap(pp_context->curbe.bo);
479 }
480
481 static void
482 ironlake_pp_states_setup(VADriverContextP ctx,
483                          struct i965_post_processing_context *pp_context)
484 {
485     ironlake_pp_interface_descriptor_table(pp_context);
486     ironlake_pp_vfe_state(pp_context);
487     ironlake_pp_upload_constants(pp_context);
488 }
489
490 static void
491 ironlake_pp_pipeline_select(VADriverContextP ctx,
492                             struct i965_post_processing_context *pp_context)
493 {
494     struct intel_batchbuffer *batch = pp_context->batch;
495
496     BEGIN_BATCH(batch, 1);
497     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
498     ADVANCE_BATCH(batch);
499 }
500
501 static void
502 ironlake_pp_urb_layout(VADriverContextP ctx,
503                        struct i965_post_processing_context *pp_context)
504 {
505     struct intel_batchbuffer *batch = pp_context->batch;
506     unsigned int vfe_fence, cs_fence;
507
508     vfe_fence = pp_context->urb.cs_start;
509     cs_fence = pp_context->urb.size;
510
511     BEGIN_BATCH(batch, 3);
512     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
513     OUT_BATCH(batch, 0);
514     OUT_BATCH(batch, 
515               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
516               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
517     ADVANCE_BATCH(batch);
518 }
519
520 static void
521 ironlake_pp_state_base_address(VADriverContextP ctx,
522                                struct i965_post_processing_context *pp_context)
523 {
524     struct intel_batchbuffer *batch = pp_context->batch;
525
526     BEGIN_BATCH(batch, 8);
527     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
528     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
529     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
530     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
531     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
532     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
533     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
534     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
535     ADVANCE_BATCH(batch);
536 }
537
538 static void
539 ironlake_pp_state_pointers(VADriverContextP ctx,
540                            struct i965_post_processing_context *pp_context)
541 {
542     struct intel_batchbuffer *batch = pp_context->batch;
543
544     BEGIN_BATCH(batch, 3);
545     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
546     OUT_BATCH(batch, 0);
547     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
548     ADVANCE_BATCH(batch);
549 }
550
551 static void 
552 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
553                           struct i965_post_processing_context *pp_context)
554 {
555     struct intel_batchbuffer *batch = pp_context->batch;
556
557     BEGIN_BATCH(batch, 2);
558     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
559     OUT_BATCH(batch,
560               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
561               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
562     ADVANCE_BATCH(batch);
563 }
564
565 static void
566 ironlake_pp_constant_buffer(VADriverContextP ctx,
567                             struct i965_post_processing_context *pp_context)
568 {
569     struct intel_batchbuffer *batch = pp_context->batch;
570
571     BEGIN_BATCH(batch, 2);
572     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
573     OUT_RELOC(batch, pp_context->curbe.bo,
574               I915_GEM_DOMAIN_INSTRUCTION, 0,
575               pp_context->urb.size_cs_entry - 1);
576     ADVANCE_BATCH(batch);    
577 }
578
579 static void
580 ironlake_pp_object_walker(VADriverContextP ctx,
581                           struct i965_post_processing_context *pp_context)
582 {
583     struct intel_batchbuffer *batch = pp_context->batch;
584     int x, x_steps, y, y_steps;
585
586     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
587     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
588
589     for (y = 0; y < y_steps; y++) {
590         for (x = 0; x < x_steps; x++) {
591             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
592                 BEGIN_BATCH(batch, 20);
593                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
594                 OUT_BATCH(batch, 0);
595                 OUT_BATCH(batch, 0); /* no indirect data */
596                 OUT_BATCH(batch, 0);
597
598                 /* inline data grf 5-6 */
599                 assert(sizeof(pp_inline_parameter) == 64);
600                 intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
601
602                 ADVANCE_BATCH(batch);
603             }
604         }
605     }
606 }
607
608 static void
609 ironlake_pp_pipeline_setup(VADriverContextP ctx,
610                            struct i965_post_processing_context *pp_context)
611 {
612     struct intel_batchbuffer *batch = pp_context->batch;
613
614     intel_batchbuffer_start_atomic(batch, 0x1000);
615     intel_batchbuffer_emit_mi_flush(batch);
616     ironlake_pp_pipeline_select(ctx, pp_context);
617     ironlake_pp_state_base_address(ctx, pp_context);
618     ironlake_pp_state_pointers(ctx, pp_context);
619     ironlake_pp_urb_layout(ctx, pp_context);
620     ironlake_pp_cs_urb_layout(ctx, pp_context);
621     ironlake_pp_constant_buffer(ctx, pp_context);
622     ironlake_pp_object_walker(ctx, pp_context);
623     intel_batchbuffer_end_atomic(batch);
624 }
625
626 static void
627 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
628                           dri_bo *surf_bo, unsigned long surf_bo_offset,
629                           int width, int height, int pitch, int format, 
630                           int index, int is_target)
631 {
632     struct i965_surface_state *ss;
633     dri_bo *ss_bo;
634     unsigned int tiling;
635     unsigned int swizzle;
636
637     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
638     ss_bo = pp_context->surface_state_binding_table.bo;
639     assert(ss_bo);
640
641     dri_bo_map(ss_bo, True);
642     assert(ss_bo->virtual);
643     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
644     memset(ss, 0, sizeof(*ss));
645     ss->ss0.surface_type = I965_SURFACE_2D;
646     ss->ss0.surface_format = format;
647     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
648     ss->ss2.width = width - 1;
649     ss->ss2.height = height - 1;
650     ss->ss3.pitch = pitch - 1;
651     pp_set_surface_tiling(ss, tiling);
652     dri_bo_emit_reloc(ss_bo,
653                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
654                       surf_bo_offset,
655                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
656                       surf_bo);
657     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
658     dri_bo_unmap(ss_bo);
659 }
660
661 static void
662 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
663                            dri_bo *surf_bo, unsigned long surf_bo_offset,
664                            int width, int height, int wpitch,
665                            int xoffset, int yoffset,
666                            int format, int interleave_chroma,
667                            int index)
668 {
669     struct i965_surface_state2 *ss2;
670     dri_bo *ss2_bo;
671     unsigned int tiling;
672     unsigned int swizzle;
673
674     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
675     ss2_bo = pp_context->surface_state_binding_table.bo;
676     assert(ss2_bo);
677
678     dri_bo_map(ss2_bo, True);
679     assert(ss2_bo->virtual);
680     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
681     memset(ss2, 0, sizeof(*ss2));
682     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
683     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
684     ss2->ss1.width = width - 1;
685     ss2->ss1.height = height - 1;
686     ss2->ss2.pitch = wpitch - 1;
687     ss2->ss2.interleave_chroma = interleave_chroma;
688     ss2->ss2.surface_format = format;
689     ss2->ss3.x_offset_for_cb = xoffset;
690     ss2->ss3.y_offset_for_cb = yoffset;
691     pp_set_surface2_tiling(ss2, tiling);
692     dri_bo_emit_reloc(ss2_bo,
693                       I915_GEM_DOMAIN_RENDER, 0,
694                       surf_bo_offset,
695                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
696                       surf_bo);
697     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
698     dri_bo_unmap(ss2_bo);
699 }
700
701 static void 
702 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
703                                 const struct i965_surface *surface, 
704                                 int base_index, int is_target,
705                                 int *width, int *height, int *pitch, int *offset)
706 {
707     struct i965_driver_data *i965 = i965_driver_data(ctx);
708     struct object_surface *obj_surface;
709     struct object_image *obj_image;
710     dri_bo *bo;
711     int fourcc = pp_get_surface_fourcc(ctx, surface);
712     const int Y = 0;
713     const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
714     const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
715     const int UV = 1;
716     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
717
718     if (surface->flag == I965_SURFACE_SURFACE) {
719         obj_surface = SURFACE(surface->id);
720         bo = obj_surface->bo;
721         width[0] = obj_surface->orig_width;
722         height[0] = obj_surface->orig_height;
723         pitch[0] = obj_surface->width;
724         offset[0] = 0;
725
726         if (interleaved_uv) {
727             width[1] = obj_surface->orig_width;
728             height[1] = obj_surface->orig_height / 2;
729             pitch[1] = obj_surface->width;
730             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
731         } else {
732             width[1] = obj_surface->orig_width / 2;
733             height[1] = obj_surface->orig_height / 2;
734             pitch[1] = obj_surface->width / 2;
735             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
736             width[2] = obj_surface->orig_width / 2;
737             height[2] = obj_surface->orig_height / 2;
738             pitch[2] = obj_surface->width / 2;
739             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
740         }
741     } else {
742         obj_image = IMAGE(surface->id);
743         bo = obj_image->bo;
744         width[0] = obj_image->image.width;
745         height[0] = obj_image->image.height;
746         pitch[0] = obj_image->image.pitches[0];
747         offset[0] = obj_image->image.offsets[0];
748
749         if (interleaved_uv) {
750             width[1] = obj_image->image.width;
751             height[1] = obj_image->image.height / 2;
752             pitch[1] = obj_image->image.pitches[1];
753             offset[1] = obj_image->image.offsets[1];
754         } else {
755             width[1] = obj_image->image.width / 2;
756             height[1] = obj_image->image.height / 2;
757             pitch[1] = obj_image->image.pitches[1];
758             offset[1] = obj_image->image.offsets[1];
759             width[2] = obj_image->image.width / 2;
760             height[2] = obj_image->image.height / 2;
761             pitch[2] = obj_image->image.pitches[2];
762             offset[2] = obj_image->image.offsets[2];
763         }
764     }
765
766     /* Y surface */
767     i965_pp_set_surface_state(ctx, pp_context,
768                               bo, offset[Y],
769                               width[Y] / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
770                               base_index, is_target);
771
772     if (interleaved_uv) {
773         i965_pp_set_surface_state(ctx, pp_context,
774                                   bo, offset[UV],
775                                   width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
776                                   base_index + 1, is_target);
777     } else {
778         /* U surface */
779         i965_pp_set_surface_state(ctx, pp_context,
780                                   bo, offset[U],
781                                   width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
782                                   base_index + 1, is_target);
783
784         /* V surface */
785         i965_pp_set_surface_state(ctx, pp_context,
786                                   bo, offset[V],
787                                   width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
788                                   base_index + 2, is_target);
789     }
790
791 }
792
793 static int
794 pp_null_x_steps(void *private_context)
795 {
796     return 1;
797 }
798
799 static int
800 pp_null_y_steps(void *private_context)
801 {
802     return 1;
803 }
804
805 static int
806 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
807 {
808     return 0;
809 }
810
811 static void
812 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
813                    const struct i965_surface *src_surface,
814                    const VARectangle *src_rect,
815                    const struct i965_surface *dst_surface,
816                    const VARectangle *dst_rect,
817                    void *filter_param)
818 {
819     /* private function & data */
820     pp_context->pp_x_steps = pp_null_x_steps;
821     pp_context->pp_y_steps = pp_null_y_steps;
822     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
823 }
824
825 static int
826 pp_load_save_x_steps(void *private_context)
827 {
828     return 1;
829 }
830
831 static int
832 pp_load_save_y_steps(void *private_context)
833 {
834     struct pp_load_save_context *pp_load_save_context = private_context;
835
836     return pp_load_save_context->dest_h / 8;
837 }
838
839 static int
840 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
841 {
842     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
843     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
844     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
845     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
846
847     return 0;
848 }
849
850 static void
851 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
852                                 const struct i965_surface *src_surface,
853                                 const VARectangle *src_rect,
854                                 const struct i965_surface *dst_surface,
855                                 const VARectangle *dst_rect,
856                                 void *filter_param)
857 {
858     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
859     int width[3], height[3], pitch[3], offset[3];
860     const int Y = 0;
861
862     /* source surface */
863     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
864                                     width, height, pitch, offset);
865
866     /* destination surface */
867     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
868                                     width, height, pitch, offset);
869
870     /* private function & data */
871     pp_context->pp_x_steps = pp_load_save_x_steps;
872     pp_context->pp_y_steps = pp_load_save_y_steps;
873     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
874     pp_load_save_context->dest_h = ALIGN(height[Y], 16);
875     pp_load_save_context->dest_w = ALIGN(width[Y], 16);
876
877     pp_inline_parameter.grf5.block_count_x = ALIGN(width[Y], 16) / 16;   /* 1 x N */
878     pp_inline_parameter.grf5.number_blocks = ALIGN(width[Y], 16) / 16;
879 }
880
881 static int
882 pp_scaling_x_steps(void *private_context)
883 {
884     return 1;
885 }
886
887 static int
888 pp_scaling_y_steps(void *private_context)
889 {
890     struct pp_scaling_context *pp_scaling_context = private_context;
891
892     return pp_scaling_context->dest_h / 8;
893 }
894
895 static int
896 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
897 {
898     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
899     float src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
900     float src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
901
902     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
903     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
904     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
905     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
906     
907     return 0;
908 }
909
910 static void
911 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
912                            const struct i965_surface *src_surface,
913                            const VARectangle *src_rect,
914                            const struct i965_surface *dst_surface,
915                            const VARectangle *dst_rect,
916                            void *filter_param)
917 {
918     struct i965_driver_data *i965 = i965_driver_data(ctx);
919     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
920     struct object_surface *obj_surface;
921     struct i965_sampler_state *sampler_state;
922     int in_w, in_h, in_wpitch, in_hpitch;
923     int out_w, out_h, out_wpitch, out_hpitch;
924
925     /* source surface */
926     obj_surface = SURFACE(src_surface->id);
927     in_w = obj_surface->orig_width;
928     in_h = obj_surface->orig_height;
929     in_wpitch = obj_surface->width;
930     in_hpitch = obj_surface->height;
931
932     /* source Y surface index 1 */
933     i965_pp_set_surface_state(ctx, pp_context,
934                               obj_surface->bo, 0,
935                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
936                               1, 0);
937
938     /* source UV surface index 2 */
939     i965_pp_set_surface_state(ctx, pp_context,
940                               obj_surface->bo, in_wpitch * in_hpitch,
941                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
942                               2, 0);
943
944     /* destination surface */
945     obj_surface = SURFACE(dst_surface->id);
946     out_w = obj_surface->orig_width;
947     out_h = obj_surface->orig_height;
948     out_wpitch = obj_surface->width;
949     out_hpitch = obj_surface->height;
950
951     /* destination Y surface index 7 */
952     i965_pp_set_surface_state(ctx, pp_context,
953                               obj_surface->bo, 0,
954                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
955                               7, 1);
956
957     /* destination UV surface index 8 */
958     i965_pp_set_surface_state(ctx, pp_context,
959                               obj_surface->bo, out_wpitch * out_hpitch,
960                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
961                               8, 1);
962
963     /* sampler state */
964     dri_bo_map(pp_context->sampler_state_table.bo, True);
965     assert(pp_context->sampler_state_table.bo->virtual);
966     sampler_state = pp_context->sampler_state_table.bo->virtual;
967
968     /* SIMD16 Y index 1 */
969     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
970     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
971     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
972     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
973     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
974
975     /* SIMD16 UV index 2 */
976     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
977     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
978     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
979     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
980     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
981
982     dri_bo_unmap(pp_context->sampler_state_table.bo);
983
984     /* private function & data */
985     pp_context->pp_x_steps = pp_scaling_x_steps;
986     pp_context->pp_y_steps = pp_scaling_y_steps;
987     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
988
989     pp_scaling_context->dest_x = dst_rect->x;
990     pp_scaling_context->dest_y = dst_rect->y;
991     pp_scaling_context->dest_w = ALIGN(dst_rect->width, 16);
992     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 16);
993     pp_scaling_context->src_normalized_x = (float)src_rect->x / in_w / out_w;
994     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h / out_h;
995
996     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / out_h;
997
998     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / out_w;
999     pp_inline_parameter.grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
1000     pp_inline_parameter.grf5.number_blocks = pp_scaling_context->dest_w / 16;
1001     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1002     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1003 }
1004
1005 static int
1006 pp_avs_x_steps(void *private_context)
1007 {
1008     struct pp_avs_context *pp_avs_context = private_context;
1009
1010     return pp_avs_context->dest_w / 16;
1011 }
1012
1013 static int
1014 pp_avs_y_steps(void *private_context)
1015 {
1016     return 1;
1017 }
1018
1019 static int
1020 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1021 {
1022     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1023     float src_x_steping, src_y_steping, video_step_delta;
1024     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
1025
1026     if (tmp_w >= pp_avs_context->dest_w) {
1027         pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1028         pp_inline_parameter.grf6.video_step_delta = 0;
1029         
1030         if (x == 0) {
1031             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
1032                 pp_avs_context->src_normalized_x;
1033         } else {
1034             src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1035             video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1036             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1037                 16 * 15 * video_step_delta / 2;
1038         }
1039     } else {
1040         int n0, n1, n2, nls_left, nls_right;
1041         int factor_a = 5, factor_b = 4;
1042         float f;
1043
1044         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1045         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1046         n2 = tmp_w / (16 * factor_a);
1047         nls_left = n0 + n2;
1048         nls_right = n1 + n2;
1049         f = (float) n2 * 16 / tmp_w;
1050         
1051         if (n0 < 5) {
1052             pp_inline_parameter.grf6.video_step_delta = 0.0;
1053
1054             if (x == 0) {
1055                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1056                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1057             } else {
1058                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1059                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1060                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1061                     16 * 15 * video_step_delta / 2;
1062             }
1063         } else {
1064             if (x < nls_left) {
1065                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1066                 float a = f / (nls_left * 16 * factor_b);
1067                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1068                 
1069                 pp_inline_parameter.grf6.video_step_delta = b;
1070
1071                 if (x == 0) {
1072                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1073                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a;
1074                 } else {
1075                     src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1076                     video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1077                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1078                         16 * 15 * video_step_delta / 2;
1079                     pp_inline_parameter.grf5.normalized_video_x_scaling_step += 16 * b;
1080                 }
1081             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1082                 /* scale the center linearly */
1083                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1084                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1085                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1086                     16 * 15 * video_step_delta / 2;
1087                 pp_inline_parameter.grf6.video_step_delta = 0.0;
1088                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1089             } else {
1090                 float a = f / (nls_right * 16 * factor_b);
1091                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1092
1093                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1094                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1095                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1096                     16 * 15 * video_step_delta / 2;
1097                 pp_inline_parameter.grf6.video_step_delta = -b;
1098
1099                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
1100                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
1101                 else
1102                     pp_inline_parameter.grf5.normalized_video_x_scaling_step -= b * 16;
1103             }
1104         }
1105     }
1106
1107     src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
1108     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
1109     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
1110     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
1111
1112     return 0;
1113 }
1114
1115 static void
1116 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1117                        const struct i965_surface *src_surface,
1118                        const VARectangle *src_rect,
1119                        const struct i965_surface *dst_surface,
1120                        const VARectangle *dst_rect,
1121                        void *filter_param)
1122 {
1123     struct i965_driver_data *i965 = i965_driver_data(ctx);
1124     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1125     struct object_surface *obj_surface;
1126     struct i965_sampler_8x8 *sampler_8x8;
1127     struct i965_sampler_8x8_state *sampler_8x8_state;
1128     int index;
1129     int in_w, in_h, in_wpitch, in_hpitch;
1130     int out_w, out_h, out_wpitch, out_hpitch;
1131
1132     /* surface */
1133     obj_surface = SURFACE(src_surface->id);
1134     in_w = obj_surface->orig_width;
1135     in_h = obj_surface->orig_height;
1136     in_wpitch = obj_surface->width;
1137     in_hpitch = obj_surface->height;
1138
1139     /* source Y surface index 1 */
1140     i965_pp_set_surface2_state(ctx, pp_context,
1141                                obj_surface->bo, 0,
1142                                in_w, in_h, in_wpitch,
1143                                0, 0,
1144                                SURFACE_FORMAT_Y8_UNORM, 0,
1145                                1);
1146
1147     /* source UV surface index 2 */
1148     i965_pp_set_surface2_state(ctx, pp_context,
1149                                obj_surface->bo, in_wpitch * in_hpitch,
1150                                in_w, in_h, in_wpitch,
1151                                0, 0,
1152                                SURFACE_FORMAT_PLANAR_420_8, 1,
1153                                2);
1154
1155     /* destination surface */
1156     obj_surface = SURFACE(dst_surface->id);
1157     out_w = obj_surface->orig_width;
1158     out_h = obj_surface->orig_height;
1159     out_wpitch = obj_surface->width;
1160     out_hpitch = obj_surface->height;
1161     assert(out_w <= out_wpitch && out_h <= out_hpitch);
1162
1163     /* destination Y surface index 7 */
1164     i965_pp_set_surface_state(ctx, pp_context,
1165                               obj_surface->bo, 0,
1166                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1167                               7, 1);
1168
1169     /* destination UV surface index 8 */
1170     i965_pp_set_surface_state(ctx, pp_context,
1171                               obj_surface->bo, out_wpitch * out_hpitch,
1172                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1173                               8, 1);
1174
1175     /* sampler 8x8 state */
1176     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1177     assert(pp_context->sampler_state_table.bo_8x8->virtual);
1178     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1179     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1180     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1181     sampler_8x8_state->dw136.default_sharpness_level = 0;
1182     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1183     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1184     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1185     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1186
1187     /* sampler 8x8 */
1188     dri_bo_map(pp_context->sampler_state_table.bo, True);
1189     assert(pp_context->sampler_state_table.bo->virtual);
1190     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
1191     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1192
1193     /* sample_8x8 Y index 1 */
1194     index = 1;
1195     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1196     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1197     sampler_8x8[index].dw0.ief_bypass = 0;
1198     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1199     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1200     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1201     sampler_8x8[index].dw2.global_noise_estimation = 22;
1202     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1203     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1204     sampler_8x8[index].dw3.strong_edge_weight = 7;
1205     sampler_8x8[index].dw3.regular_weight = 2;
1206     sampler_8x8[index].dw3.non_edge_weight = 0;
1207     sampler_8x8[index].dw3.gain_factor = 40;
1208     sampler_8x8[index].dw4.steepness_boost = 0;
1209     sampler_8x8[index].dw4.steepness_threshold = 0;
1210     sampler_8x8[index].dw4.mr_boost = 0;
1211     sampler_8x8[index].dw4.mr_threshold = 5;
1212     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1213     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1214     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1215     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1216     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1217     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1218     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1219     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1220     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1221     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1222     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1223     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1224     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1225     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1226     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1227     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1228     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1229     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1230     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1231     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1232     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1233     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1234     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1235     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1236     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1237     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1238     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1239     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1240     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1241     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1242     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1243     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1244     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1245     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1246     sampler_8x8[index].dw13.limiter_boost = 0;
1247     sampler_8x8[index].dw13.minimum_limiter = 10;
1248     sampler_8x8[index].dw13.maximum_limiter = 11;
1249     sampler_8x8[index].dw14.clip_limiter = 130;
1250     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1251                       I915_GEM_DOMAIN_RENDER, 
1252                       0,
1253                       0,
1254                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1255                       pp_context->sampler_state_table.bo_8x8);
1256
1257     dri_bo_map(pp_context->sampler_state_table.bo_8x8_uv, True);
1258     assert(pp_context->sampler_state_table.bo_8x8_uv->virtual);
1259     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1260     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8_uv->virtual;
1261     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1262     sampler_8x8_state->dw136.default_sharpness_level = 0;
1263     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0;
1264     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1265     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1266     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8_uv);
1267
1268     /* sample_8x8 UV index 2 */
1269     index = 2;
1270     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1271     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_NEAREST;
1272     sampler_8x8[index].dw0.ief_bypass = 0;
1273     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1274     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1275     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8_uv->offset >> 5;
1276     sampler_8x8[index].dw2.global_noise_estimation = 22;
1277     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1278     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1279     sampler_8x8[index].dw3.strong_edge_weight = 7;
1280     sampler_8x8[index].dw3.regular_weight = 2;
1281     sampler_8x8[index].dw3.non_edge_weight = 0;
1282     sampler_8x8[index].dw3.gain_factor = 40;
1283     sampler_8x8[index].dw4.steepness_boost = 0;
1284     sampler_8x8[index].dw4.steepness_threshold = 0;
1285     sampler_8x8[index].dw4.mr_boost = 0;
1286     sampler_8x8[index].dw4.mr_threshold = 5;
1287     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1288     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1289     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1290     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1291     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1292     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1293     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1294     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1295     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1296     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1297     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1298     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1299     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1300     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1301     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1302     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1303     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1304     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1305     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1306     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1307     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1308     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1309     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1310     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1311     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1312     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1313     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1314     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1315     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1316     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1317     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1318     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1319     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1320     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1321     sampler_8x8[index].dw13.limiter_boost = 0;
1322     sampler_8x8[index].dw13.minimum_limiter = 10;
1323     sampler_8x8[index].dw13.maximum_limiter = 11;
1324     sampler_8x8[index].dw14.clip_limiter = 130;
1325     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1326                       I915_GEM_DOMAIN_RENDER, 
1327                       0,
1328                       0,
1329                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1330                       pp_context->sampler_state_table.bo_8x8_uv);
1331
1332     dri_bo_unmap(pp_context->sampler_state_table.bo);
1333
1334     /* private function & data */
1335     pp_context->pp_x_steps = pp_avs_x_steps;
1336     pp_context->pp_y_steps = pp_avs_y_steps;
1337     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
1338
1339     pp_avs_context->dest_x = dst_rect->x;
1340     pp_avs_context->dest_y = dst_rect->y;
1341     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
1342     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
1343     pp_avs_context->src_normalized_x = (float)src_rect->x / in_w / out_w;
1344     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h / out_h;
1345     pp_avs_context->src_w = src_rect->width;
1346     pp_avs_context->src_h = src_rect->height;
1347
1348     pp_static_parameter.grf4.r4_2.avs.nlas = 1;
1349     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / out_h;
1350
1351     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / out_w;
1352     pp_inline_parameter.grf5.block_count_x = 1;        /* M x 1 */
1353     pp_inline_parameter.grf5.number_blocks = pp_avs_context->dest_h / 8;
1354     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1355     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1356     pp_inline_parameter.grf6.video_step_delta = 0.0;
1357 }
1358
1359 static int
1360 pp_dndi_x_steps(void *private_context)
1361 {
1362     return 1;
1363 }
1364
1365 static int
1366 pp_dndi_y_steps(void *private_context)
1367 {
1368     struct pp_dndi_context *pp_dndi_context = private_context;
1369
1370     return pp_dndi_context->dest_h / 4;
1371 }
1372
1373 static int
1374 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1375 {
1376     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1377     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 4;
1378
1379     return 0;
1380 }
1381
1382 static 
1383 void pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1384                              const struct i965_surface *src_surface,
1385                              const VARectangle *src_rect,
1386                              const struct i965_surface *dst_surface,
1387                              const VARectangle *dst_rect,
1388                              void *filter_param)
1389 {
1390     struct i965_driver_data *i965 = i965_driver_data(ctx);
1391     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
1392     struct object_surface *obj_surface;
1393     struct i965_sampler_dndi *sampler_dndi;
1394     VAProcFilterBaseParameterBuffer *dndi_filter_param = filter_param;
1395     int index;
1396     int w, h;
1397     int orig_w, orig_h;
1398     int dn_strength = 15;
1399
1400     if (dndi_filter_param) {
1401         int value = dndi_filter_param->value;
1402         
1403         if (value > 1.0)
1404             value = 1.0;
1405         
1406         if (value < 0.0)
1407             value = 0.0;
1408
1409         dn_strength = (int)(value * 31.0F);
1410     }
1411
1412     /* surface */
1413     obj_surface = SURFACE(src_surface->id);
1414     orig_w = obj_surface->orig_width;
1415     orig_h = obj_surface->orig_height;
1416     w = obj_surface->width;
1417     h = obj_surface->height;
1418
1419     if (pp_context->stmm.bo == NULL) {
1420         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
1421                                            "STMM surface",
1422                                            w * h,
1423                                            4096);
1424         assert(pp_context->stmm.bo);
1425     }
1426
1427     /* source UV surface index 2 */
1428     i965_pp_set_surface_state(ctx, pp_context,
1429                               obj_surface->bo, w * h,
1430                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
1431                               2, 0);
1432
1433     /* source YUV surface index 4 */
1434     i965_pp_set_surface2_state(ctx, pp_context,
1435                                obj_surface->bo, 0,
1436                                orig_w, orig_w, w,
1437                                0, h,
1438                                SURFACE_FORMAT_PLANAR_420_8, 1,
1439                                4);
1440
1441     /* source STMM surface index 20 */
1442     i965_pp_set_surface_state(ctx, pp_context,
1443                               pp_context->stmm.bo, 0,
1444                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
1445                               20, 1);
1446
1447     /* destination surface */
1448     obj_surface = SURFACE(dst_surface->id);
1449     orig_w = obj_surface->orig_width;
1450     orig_h = obj_surface->orig_height;
1451     w = obj_surface->width;
1452     h = obj_surface->height;
1453
1454     /* destination Y surface index 7 */
1455     i965_pp_set_surface_state(ctx, pp_context,
1456                               obj_surface->bo, 0,
1457                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
1458                               7, 1);
1459
1460     /* destination UV surface index 8 */
1461     i965_pp_set_surface_state(ctx, pp_context,
1462                               obj_surface->bo, w * h,
1463                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
1464                               8, 1);
1465     /* sampler dndi */
1466     dri_bo_map(pp_context->sampler_state_table.bo, True);
1467     assert(pp_context->sampler_state_table.bo->virtual);
1468     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
1469     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
1470
1471     /* sample dndi index 1 */
1472     index = 0;
1473     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
1474     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
1475     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
1476     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
1477
1478     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
1479     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
1480     sampler_dndi[index].dw1.stmm_c2 = 0;
1481     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
1482     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
1483
1484     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
1485     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
1486     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
1487     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
1488
1489     sampler_dndi[index].dw3.maximum_stmm = 128;
1490     sampler_dndi[index].dw3.multipler_for_vecm = 2;
1491     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
1492     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
1493     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
1494
1495     sampler_dndi[index].dw4.sdi_delta = 8;
1496     sampler_dndi[index].dw4.sdi_threshold = 128;
1497     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
1498     sampler_dndi[index].dw4.stmm_shift_up = 0;
1499     sampler_dndi[index].dw4.stmm_shift_down = 0;
1500     sampler_dndi[index].dw4.minimum_stmm = 0;
1501
1502     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
1503     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
1504     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
1505     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
1506
1507     sampler_dndi[index].dw6.dn_enable = 1;
1508     sampler_dndi[index].dw6.di_enable = 1;
1509     sampler_dndi[index].dw6.di_partial = 0;
1510     sampler_dndi[index].dw6.dndi_top_first = 1;
1511     sampler_dndi[index].dw6.dndi_stream_id = 1;
1512     sampler_dndi[index].dw6.dndi_first_frame = 1;
1513     sampler_dndi[index].dw6.progressive_dn = 0;
1514     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
1515     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
1516     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
1517
1518     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
1519     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
1520     sampler_dndi[index].dw7.vdi_walker_enable = 0;
1521     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
1522
1523     dri_bo_unmap(pp_context->sampler_state_table.bo);
1524
1525     /* private function & data */
1526     pp_context->pp_x_steps = pp_dndi_x_steps;
1527     pp_context->pp_y_steps = pp_dndi_y_steps;
1528     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
1529
1530     pp_static_parameter.grf1.statistics_surface_picth = w / 2;
1531     pp_static_parameter.grf1.r1_6.di.top_field_first = 0;
1532     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m2 = 64;
1533     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m1 = 192;
1534
1535     pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
1536     pp_inline_parameter.grf5.number_blocks = w / 16;
1537     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1538     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1539
1540     pp_dndi_context->dest_w = w;
1541     pp_dndi_context->dest_h = h;
1542 }
1543
1544 static void
1545 ironlake_pp_initialize(
1546     VADriverContextP   ctx,
1547     struct i965_post_processing_context *pp_context,
1548     const struct i965_surface *src_surface,
1549     const VARectangle *src_rect,
1550     const struct i965_surface *dst_surface,
1551     const VARectangle *dst_rect,
1552     int                pp_index,
1553     void *filter_param
1554 )
1555 {
1556     struct i965_driver_data *i965 = i965_driver_data(ctx);
1557     struct pp_module *pp_module;
1558     dri_bo *bo;
1559
1560     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
1561     bo = dri_bo_alloc(i965->intel.bufmgr,
1562                       "surface state & binding table",
1563                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
1564                       4096);
1565     assert(bo);
1566     pp_context->surface_state_binding_table.bo = bo;
1567
1568     dri_bo_unreference(pp_context->curbe.bo);
1569     bo = dri_bo_alloc(i965->intel.bufmgr,
1570                       "constant buffer",
1571                       4096, 
1572                       4096);
1573     assert(bo);
1574     pp_context->curbe.bo = bo;
1575
1576     dri_bo_unreference(pp_context->idrt.bo);
1577     bo = dri_bo_alloc(i965->intel.bufmgr, 
1578                       "interface discriptor", 
1579                       sizeof(struct i965_interface_descriptor), 
1580                       4096);
1581     assert(bo);
1582     pp_context->idrt.bo = bo;
1583     pp_context->idrt.num_interface_descriptors = 0;
1584
1585     dri_bo_unreference(pp_context->sampler_state_table.bo);
1586     bo = dri_bo_alloc(i965->intel.bufmgr, 
1587                       "sampler state table", 
1588                       4096,
1589                       4096);
1590     assert(bo);
1591     dri_bo_map(bo, True);
1592     memset(bo->virtual, 0, bo->size);
1593     dri_bo_unmap(bo);
1594     pp_context->sampler_state_table.bo = bo;
1595
1596     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1597     bo = dri_bo_alloc(i965->intel.bufmgr, 
1598                       "sampler 8x8 state ",
1599                       4096,
1600                       4096);
1601     assert(bo);
1602     pp_context->sampler_state_table.bo_8x8 = bo;
1603
1604     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1605     bo = dri_bo_alloc(i965->intel.bufmgr, 
1606                       "sampler 8x8 state ",
1607                       4096,
1608                       4096);
1609     assert(bo);
1610     pp_context->sampler_state_table.bo_8x8_uv = bo;
1611
1612     dri_bo_unreference(pp_context->vfe_state.bo);
1613     bo = dri_bo_alloc(i965->intel.bufmgr, 
1614                       "vfe state", 
1615                       sizeof(struct i965_vfe_state), 
1616                       4096);
1617     assert(bo);
1618     pp_context->vfe_state.bo = bo;
1619     
1620     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
1621     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
1622     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
1623     pp_context->current_pp = pp_index;
1624     pp_module = &pp_context->pp_modules[pp_index];
1625     
1626     if (pp_module->initialize)
1627         pp_module->initialize(ctx, pp_context,
1628                               src_surface,
1629                               src_rect,
1630                               dst_surface,
1631                               dst_rect,
1632                               filter_param);
1633 }
1634
1635 static void
1636 ironlake_post_processing(
1637     VADriverContextP   ctx,
1638     struct i965_post_processing_context *pp_context,
1639     const struct i965_surface *src_surface,
1640     const VARectangle *src_rect,
1641     const struct i965_surface *dst_surface,
1642     const VARectangle *dst_rect,
1643     int                pp_index,
1644     void *filter_param
1645 )
1646 {
1647     ironlake_pp_initialize(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
1648     ironlake_pp_states_setup(ctx, pp_context);
1649     ironlake_pp_pipeline_setup(ctx, pp_context);
1650 }
1651
1652 static void
1653 gen6_pp_initialize(
1654     VADriverContextP   ctx,
1655     struct i965_post_processing_context *pp_context,
1656     const struct i965_surface *src_surface,
1657     const VARectangle *src_rect,
1658     const struct i965_surface *dst_surface,
1659     const VARectangle *dst_rect,
1660     int                pp_index,
1661     void *filter_param
1662 )
1663 {
1664     struct i965_driver_data *i965 = i965_driver_data(ctx);
1665     struct pp_module *pp_module;
1666     dri_bo *bo;
1667
1668     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
1669     bo = dri_bo_alloc(i965->intel.bufmgr,
1670                       "surface state & binding table",
1671                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
1672                       4096);
1673     assert(bo);
1674     pp_context->surface_state_binding_table.bo = bo;
1675
1676     dri_bo_unreference(pp_context->curbe.bo);
1677     bo = dri_bo_alloc(i965->intel.bufmgr,
1678                       "constant buffer",
1679                       4096, 
1680                       4096);
1681     assert(bo);
1682     pp_context->curbe.bo = bo;
1683
1684     dri_bo_unreference(pp_context->idrt.bo);
1685     bo = dri_bo_alloc(i965->intel.bufmgr, 
1686                       "interface discriptor", 
1687                       sizeof(struct gen6_interface_descriptor_data), 
1688                       4096);
1689     assert(bo);
1690     pp_context->idrt.bo = bo;
1691     pp_context->idrt.num_interface_descriptors = 0;
1692
1693     dri_bo_unreference(pp_context->sampler_state_table.bo);
1694     bo = dri_bo_alloc(i965->intel.bufmgr, 
1695                       "sampler state table", 
1696                       4096,
1697                       4096);
1698     assert(bo);
1699     dri_bo_map(bo, True);
1700     memset(bo->virtual, 0, bo->size);
1701     dri_bo_unmap(bo);
1702     pp_context->sampler_state_table.bo = bo;
1703
1704     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1705     bo = dri_bo_alloc(i965->intel.bufmgr, 
1706                       "sampler 8x8 state ",
1707                       4096,
1708                       4096);
1709     assert(bo);
1710     pp_context->sampler_state_table.bo_8x8 = bo;
1711
1712     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1713     bo = dri_bo_alloc(i965->intel.bufmgr, 
1714                       "sampler 8x8 state ",
1715                       4096,
1716                       4096);
1717     assert(bo);
1718     pp_context->sampler_state_table.bo_8x8_uv = bo;
1719
1720     dri_bo_unreference(pp_context->vfe_state.bo);
1721     bo = dri_bo_alloc(i965->intel.bufmgr, 
1722                       "vfe state", 
1723                       sizeof(struct i965_vfe_state), 
1724                       4096);
1725     assert(bo);
1726     pp_context->vfe_state.bo = bo;
1727     
1728     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
1729     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
1730     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
1731     pp_context->current_pp = pp_index;
1732     pp_module = &pp_context->pp_modules[pp_index];
1733     
1734     if (pp_module->initialize)
1735         pp_module->initialize(ctx, pp_context,
1736                               src_surface,
1737                               src_rect,
1738                               dst_surface,
1739                               dst_rect,
1740                               filter_param);
1741 }
1742
1743 static void
1744 gen6_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1745 {
1746     struct gen6_interface_descriptor_data *desc;
1747     dri_bo *bo;
1748     int pp_index = pp_context->current_pp;
1749
1750     bo = pp_context->idrt.bo;
1751     dri_bo_map(bo, True);
1752     assert(bo->virtual);
1753     desc = bo->virtual;
1754     memset(desc, 0, sizeof(*desc));
1755     desc->desc0.kernel_start_pointer = 
1756         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
1757     desc->desc1.single_program_flow = 1;
1758     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
1759     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
1760     desc->desc2.sampler_state_pointer = 
1761         pp_context->sampler_state_table.bo->offset >> 5;
1762     desc->desc3.binding_table_entry_count = 0;
1763     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
1764     desc->desc4.constant_urb_entry_read_offset = 0;
1765     desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
1766
1767     dri_bo_emit_reloc(bo,
1768                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1769                       0,
1770                       offsetof(struct gen6_interface_descriptor_data, desc0),
1771                       pp_context->pp_modules[pp_index].kernel.bo);
1772
1773     dri_bo_emit_reloc(bo,
1774                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1775                       desc->desc2.sampler_count << 2,
1776                       offsetof(struct gen6_interface_descriptor_data, desc2),
1777                       pp_context->sampler_state_table.bo);
1778
1779     dri_bo_unmap(bo);
1780     pp_context->idrt.num_interface_descriptors++;
1781 }
1782
1783 static void
1784 gen6_pp_upload_constants(struct i965_post_processing_context *pp_context)
1785 {
1786     unsigned char *constant_buffer;
1787
1788     assert(sizeof(pp_static_parameter) == 128);
1789     dri_bo_map(pp_context->curbe.bo, 1);
1790     assert(pp_context->curbe.bo->virtual);
1791     constant_buffer = pp_context->curbe.bo->virtual;
1792     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
1793     dri_bo_unmap(pp_context->curbe.bo);
1794 }
1795
1796 static void
1797 gen6_pp_states_setup(VADriverContextP ctx,
1798                      struct i965_post_processing_context *pp_context)
1799 {
1800     gen6_pp_interface_descriptor_table(pp_context);
1801     gen6_pp_upload_constants(pp_context);
1802 }
1803
1804 static void
1805 gen6_pp_pipeline_select(VADriverContextP ctx,
1806                         struct i965_post_processing_context *pp_context)
1807 {
1808     struct intel_batchbuffer *batch = pp_context->batch;
1809
1810     BEGIN_BATCH(batch, 1);
1811     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
1812     ADVANCE_BATCH(batch);
1813 }
1814
1815 static void
1816 gen6_pp_state_base_address(VADriverContextP ctx,
1817                            struct i965_post_processing_context *pp_context)
1818 {
1819     struct intel_batchbuffer *batch = pp_context->batch;
1820
1821     BEGIN_BATCH(batch, 10);
1822     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1823     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1824     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1825     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1826     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1827     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1828     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1829     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1830     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1831     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1832     ADVANCE_BATCH(batch);
1833 }
1834
1835 static void
1836 gen6_pp_vfe_state(VADriverContextP ctx,
1837                   struct i965_post_processing_context *pp_context)
1838 {
1839     struct intel_batchbuffer *batch = pp_context->batch;
1840
1841     BEGIN_BATCH(batch, 8);
1842     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
1843     OUT_BATCH(batch, 0);
1844     OUT_BATCH(batch,
1845               (pp_context->urb.num_vfe_entries - 1) << 16 |
1846               pp_context->urb.num_vfe_entries << 8);
1847     OUT_BATCH(batch, 0);
1848     OUT_BATCH(batch,
1849               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* in 256 bits unit */
1850               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 - 1));            /* in 256 bits unit */
1851     OUT_BATCH(batch, 0);
1852     OUT_BATCH(batch, 0);
1853     OUT_BATCH(batch, 0);
1854     ADVANCE_BATCH(batch);
1855 }
1856
1857 static void
1858 gen6_pp_curbe_load(VADriverContextP ctx,
1859                    struct i965_post_processing_context *pp_context)
1860 {
1861     struct intel_batchbuffer *batch = pp_context->batch;
1862
1863     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512 <= pp_context->curbe.bo->size);
1864
1865     BEGIN_BATCH(batch, 4);
1866     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
1867     OUT_BATCH(batch, 0);
1868     OUT_BATCH(batch,
1869               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512);
1870     OUT_RELOC(batch, 
1871               pp_context->curbe.bo,
1872               I915_GEM_DOMAIN_INSTRUCTION, 0,
1873               0);
1874     ADVANCE_BATCH(batch);
1875 }
1876
1877 static void
1878 gen6_interface_descriptor_load(VADriverContextP ctx,
1879                                struct i965_post_processing_context *pp_context)
1880 {
1881     struct intel_batchbuffer *batch = pp_context->batch;
1882
1883     BEGIN_BATCH(batch, 4);
1884     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
1885     OUT_BATCH(batch, 0);
1886     OUT_BATCH(batch,
1887               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
1888     OUT_RELOC(batch, 
1889               pp_context->idrt.bo,
1890               I915_GEM_DOMAIN_INSTRUCTION, 0,
1891               0);
1892     ADVANCE_BATCH(batch);
1893 }
1894
1895 static void
1896 gen6_pp_object_walker(VADriverContextP ctx,
1897                       struct i965_post_processing_context *pp_context)
1898 {
1899     struct intel_batchbuffer *batch = pp_context->batch;
1900     int x, x_steps, y, y_steps;
1901
1902     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
1903     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
1904
1905     for (y = 0; y < y_steps; y++) {
1906         for (x = 0; x < x_steps; x++) {
1907             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1908                 BEGIN_BATCH(batch, 22);
1909                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 20);
1910                 OUT_BATCH(batch, 0);
1911                 OUT_BATCH(batch, 0); /* no indirect data */
1912                 OUT_BATCH(batch, 0);
1913                 OUT_BATCH(batch, 0); /* scoreboard */
1914                 OUT_BATCH(batch, 0);
1915
1916                 /* inline data grf 5-6 */
1917                 assert(sizeof(pp_inline_parameter) == 64);
1918                 intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
1919
1920                 ADVANCE_BATCH(batch);
1921             }
1922         }
1923     }
1924 }
1925
1926 static void
1927 gen6_pp_pipeline_setup(VADriverContextP ctx,
1928                        struct i965_post_processing_context *pp_context)
1929 {
1930     struct intel_batchbuffer *batch = pp_context->batch;
1931
1932     intel_batchbuffer_start_atomic(batch, 0x1000);
1933     intel_batchbuffer_emit_mi_flush(batch);
1934     gen6_pp_pipeline_select(ctx, pp_context);
1935     gen6_pp_state_base_address(ctx, pp_context);
1936     gen6_pp_curbe_load(ctx, pp_context);
1937     gen6_interface_descriptor_load(ctx, pp_context);
1938     gen6_pp_vfe_state(ctx, pp_context);
1939     gen6_pp_object_walker(ctx, pp_context);
1940     intel_batchbuffer_end_atomic(batch);
1941 }
1942
1943 static void
1944 gen6_post_processing(
1945     VADriverContextP   ctx,
1946     struct i965_post_processing_context *pp_context,
1947     const struct i965_surface *src_surface,
1948     const VARectangle *src_rect,
1949     const struct i965_surface *dst_surface,
1950     const VARectangle *dst_rect,
1951     int                pp_index,
1952     void * filter_param
1953 )
1954 {
1955     gen6_pp_initialize(ctx, pp_context,
1956                        src_surface,
1957                        src_rect,
1958                        dst_surface,
1959                        dst_rect,
1960                        pp_index,
1961                        filter_param);
1962     gen6_pp_states_setup(ctx, pp_context);
1963     gen6_pp_pipeline_setup(ctx, pp_context);
1964 }
1965
1966 static void
1967 i965_post_processing_internal(
1968     VADriverContextP   ctx,
1969     struct i965_post_processing_context *pp_context,
1970     const struct i965_surface *src_surface,
1971     const VARectangle *src_rect,
1972     const struct i965_surface *dst_surface,
1973     const VARectangle *dst_rect,
1974     int                pp_index,
1975     void *filter_param
1976 )
1977 {
1978     struct i965_driver_data *i965 = i965_driver_data(ctx);
1979
1980     if (IS_GEN6(i965->intel.device_id) ||
1981         IS_GEN7(i965->intel.device_id))
1982         gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
1983     else
1984         ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
1985 }
1986
1987 VAStatus 
1988 i965_DestroySurfaces(VADriverContextP ctx,
1989                      VASurfaceID *surface_list,
1990                      int num_surfaces);
1991 VAStatus 
1992 i965_CreateSurfaces(VADriverContextP ctx,
1993                     int width,
1994                     int height,
1995                     int format,
1996                     int num_surfaces,
1997                     VASurfaceID *surfaces);
1998 VASurfaceID
1999 i965_post_processing(
2000     VADriverContextP   ctx,
2001     VASurfaceID        surface,
2002     const VARectangle *src_rect,
2003     const VARectangle *dst_rect,
2004     unsigned int       flags,
2005     int               *has_done_scaling  
2006 )
2007 {
2008     struct i965_driver_data *i965 = i965_driver_data(ctx);
2009     VASurfaceID in_surface_id = surface;
2010     VASurfaceID out_surface_id = VA_INVALID_ID;
2011     
2012     *has_done_scaling = 0;
2013
2014     if (HAS_PP(i965)) {
2015         struct object_surface *obj_surface;
2016         VAStatus status;
2017         struct i965_surface src_surface;
2018         struct i965_surface dst_surface;
2019
2020         obj_surface = SURFACE(in_surface_id);
2021
2022         /* Currently only support post processing for NV12 surface */
2023         if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
2024             return out_surface_id;
2025
2026         if (flags & I965_PP_FLAG_DEINTERLACING) {
2027             status = i965_CreateSurfaces(ctx,
2028                                          obj_surface->orig_width,
2029                                          obj_surface->orig_height,
2030                                          VA_RT_FORMAT_YUV420,
2031                                          1,
2032                                          &out_surface_id);
2033             assert(status == VA_STATUS_SUCCESS);
2034             obj_surface = SURFACE(out_surface_id);
2035             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
2036
2037             src_surface.id = in_surface_id;
2038             src_surface.flag = I965_SURFACE_SURFACE;
2039             dst_surface.id = out_surface_id;
2040             dst_surface.flag = I965_SURFACE_SURFACE;
2041
2042             i965_post_processing_internal(ctx, i965->pp_context,
2043                                           &src_surface,
2044                                           src_rect,
2045                                           &dst_surface,
2046                                           dst_rect,
2047                                           PP_NV12_DNDI,
2048                                           NULL);
2049         }
2050
2051         if (flags & I965_PP_FLAG_AVS) {
2052             struct i965_render_state *render_state = &i965->render_state;
2053             struct intel_region *dest_region = render_state->draw_region;
2054
2055             if (out_surface_id != VA_INVALID_ID)
2056                 in_surface_id = out_surface_id;
2057
2058             status = i965_CreateSurfaces(ctx,
2059                                          dest_region->width,
2060                                          dest_region->height,
2061                                          VA_RT_FORMAT_YUV420,
2062                                          1,
2063                                          &out_surface_id);
2064             assert(status == VA_STATUS_SUCCESS);
2065             obj_surface = SURFACE(out_surface_id);
2066             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
2067
2068             src_surface.id = in_surface_id;
2069             src_surface.flag = I965_SURFACE_SURFACE;
2070             dst_surface.id = out_surface_id;
2071             dst_surface.flag = I965_SURFACE_SURFACE;
2072
2073             i965_post_processing_internal(ctx, i965->pp_context,
2074                                           &src_surface,
2075                                           src_rect,
2076                                           &dst_surface,
2077                                           dst_rect,
2078                                           PP_NV12_AVS,
2079                                           NULL);
2080
2081             if (in_surface_id != surface)
2082                 i965_DestroySurfaces(ctx, &in_surface_id, 1);
2083                 
2084             *has_done_scaling = 1;
2085         }
2086     }
2087
2088     return out_surface_id;
2089 }       
2090
2091 static VAStatus
2092 i965_image_i420_processing(VADriverContextP ctx,
2093                            const struct i965_surface *src_surface,
2094                            const VARectangle *src_rect,
2095                            const struct i965_surface *dst_surface,
2096                            const VARectangle *dst_rect)
2097 {
2098     struct i965_driver_data *i965 = i965_driver_data(ctx);
2099     struct i965_post_processing_context *pp_context = i965->pp_context;
2100     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
2101
2102     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
2103         i965_post_processing_internal(ctx, i965->pp_context,
2104                                       src_surface,
2105                                       src_rect,
2106                                       dst_surface,
2107                                       dst_rect,
2108                                       PP_PL3_LOAD_SAVE_N12,
2109                                       NULL);
2110     } else {
2111         i965_post_processing_internal(ctx, i965->pp_context,
2112                                       src_surface,
2113                                       src_rect,
2114                                       dst_surface,
2115                                       dst_rect,
2116                                       PP_PL3_LOAD_SAVE_PL3,
2117                                       NULL);
2118     }
2119
2120     intel_batchbuffer_flush(pp_context->batch);
2121
2122     return VA_STATUS_SUCCESS;
2123 }
2124
2125 static VAStatus
2126 i965_image_nv12_processing(VADriverContextP ctx,
2127                            const struct i965_surface *src_surface,
2128                            const VARectangle *src_rect,
2129                            const struct i965_surface *dst_surface,
2130                            const VARectangle *dst_rect)
2131 {
2132     struct i965_driver_data *i965 = i965_driver_data(ctx);
2133     struct i965_post_processing_context *pp_context = i965->pp_context;
2134     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
2135
2136     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
2137         i965_post_processing_internal(ctx, i965->pp_context,
2138                                       src_surface,
2139                                       src_rect,
2140                                       dst_surface,
2141                                       dst_rect,
2142                                       PP_NV12_LOAD_SAVE_N12,
2143                                       NULL);
2144     } else {
2145         i965_post_processing_internal(ctx, i965->pp_context,
2146                                       src_surface,
2147                                       src_rect,
2148                                       dst_surface,
2149                                       dst_rect,
2150                                       PP_NV12_LOAD_SAVE_PL3,
2151                                       NULL);
2152     }
2153
2154     intel_batchbuffer_flush(pp_context->batch);
2155
2156     return VA_STATUS_SUCCESS;
2157 }
2158
2159 VAStatus
2160 i965_image_processing(VADriverContextP ctx,
2161                       const struct i965_surface *src_surface,
2162                       const VARectangle *src_rect,
2163                       const struct i965_surface *dst_surface,
2164                       const VARectangle *dst_rect)
2165 {
2166     struct i965_driver_data *i965 = i965_driver_data(ctx);
2167     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
2168
2169     if (HAS_PP(i965)) {
2170         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
2171
2172         switch (fourcc) {
2173         case VA_FOURCC('Y', 'V', '1', '2'):
2174         case VA_FOURCC('I', '4', '2', '0'):
2175             status = i965_image_i420_processing(ctx,
2176                                                 src_surface,
2177                                                 src_rect,
2178                                                 dst_surface,
2179                                                 dst_rect);
2180             break;
2181
2182         case  VA_FOURCC('N', 'V', '1', '2'):
2183             status = i965_image_nv12_processing(ctx,
2184                                                 src_surface,
2185                                                 src_rect,
2186                                                 dst_surface,
2187                                                 dst_rect);
2188             break;
2189
2190         default:
2191             status = VA_STATUS_ERROR_UNIMPLEMENTED;
2192             break;
2193         }
2194     }
2195
2196     return status;
2197 }       
2198
2199 static void
2200 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
2201 {
2202     int i;
2203
2204     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
2205     pp_context->surface_state_binding_table.bo = NULL;
2206
2207     dri_bo_unreference(pp_context->curbe.bo);
2208     pp_context->curbe.bo = NULL;
2209
2210     dri_bo_unreference(pp_context->sampler_state_table.bo);
2211     pp_context->sampler_state_table.bo = NULL;
2212
2213     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
2214     pp_context->sampler_state_table.bo_8x8 = NULL;
2215
2216     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
2217     pp_context->sampler_state_table.bo_8x8_uv = NULL;
2218
2219     dri_bo_unreference(pp_context->idrt.bo);
2220     pp_context->idrt.bo = NULL;
2221     pp_context->idrt.num_interface_descriptors = 0;
2222
2223     dri_bo_unreference(pp_context->vfe_state.bo);
2224     pp_context->vfe_state.bo = NULL;
2225
2226     dri_bo_unreference(pp_context->stmm.bo);
2227     pp_context->stmm.bo = NULL;
2228
2229     for (i = 0; i < NUM_PP_MODULES; i++) {
2230         struct pp_module *pp_module = &pp_context->pp_modules[i];
2231
2232         dri_bo_unreference(pp_module->kernel.bo);
2233         pp_module->kernel.bo = NULL;
2234     }
2235
2236 }
2237
2238 Bool
2239 i965_post_processing_terminate(VADriverContextP ctx)
2240 {
2241     struct i965_driver_data *i965 = i965_driver_data(ctx);
2242     struct i965_post_processing_context *pp_context = i965->pp_context;
2243
2244     if (pp_context) {
2245         i965_post_processing_context_finalize(pp_context);
2246         free(pp_context);
2247     }
2248
2249     i965->pp_context = NULL;
2250
2251     return True;
2252 }
2253
2254 static void
2255 i965_post_processing_context_init(VADriverContextP ctx,
2256                                   struct i965_post_processing_context *pp_context,
2257                                   struct intel_batchbuffer *batch)
2258 {
2259     struct i965_driver_data *i965 = i965_driver_data(ctx);
2260     int i;
2261
2262     pp_context->urb.size = URB_SIZE((&i965->intel));
2263     pp_context->urb.num_vfe_entries = 32;
2264     pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
2265     pp_context->urb.num_cs_entries = 1;
2266     pp_context->urb.size_cs_entry = 2;      /* in 512 bits unit */
2267     pp_context->urb.vfe_start = 0;
2268     pp_context->urb.cs_start = pp_context->urb.vfe_start + 
2269         pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
2270     assert(pp_context->urb.cs_start + 
2271            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
2272
2273     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
2274     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
2275
2276     if (IS_GEN6(i965->intel.device_id) ||
2277         IS_GEN7(i965->intel.device_id))
2278         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
2279     else if (IS_IRONLAKE(i965->intel.device_id))
2280         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
2281
2282     for (i = 0; i < NUM_PP_MODULES; i++) {
2283         struct pp_module *pp_module = &pp_context->pp_modules[i];
2284         dri_bo_unreference(pp_module->kernel.bo);
2285         if (pp_module->kernel.bin) {
2286             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
2287                                                 pp_module->kernel.name,
2288                                                 pp_module->kernel.size,
2289                                                 4096);
2290             assert(pp_module->kernel.bo);
2291             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
2292         } else {
2293             pp_module->kernel.bo = NULL;
2294         }
2295     }
2296
2297     pp_context->batch = batch;
2298 }
2299
2300 Bool
2301 i965_post_processing_init(VADriverContextP ctx)
2302 {
2303     struct i965_driver_data *i965 = i965_driver_data(ctx);
2304     struct i965_post_processing_context *pp_context = i965->pp_context;
2305
2306     if (HAS_PP(i965)) {
2307         if (pp_context == NULL) {
2308             pp_context = calloc(1, sizeof(*pp_context));
2309             i965_post_processing_context_init(ctx, pp_context, i965->batch);
2310             i965->pp_context = pp_context;
2311         }
2312     }
2313
2314     return True;
2315 }
2316
2317 static const int procfilter_to_pp_flag[10] = {
2318     PP_NULL,    /* VAProcFilterNone */
2319     PP_NULL,    /* VAProcFilterDering */
2320     PP_NULL,    /* VAProcFilterDeblocking */
2321     PP_NV12_DNDI, /* VAProcFilterNoiseReduction */
2322     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
2323     PP_NULL,    /* VAProcFilterSharpening */
2324     PP_NULL,    /* VAProcFilterColorEnhancement */
2325     PP_NULL,    /* VAProcFilterProcAmp */
2326     PP_NULL,    /* VAProcFilterComposition */
2327     PP_NULL,    /* VAProcFilterFrameRateConversion */
2328 };
2329
2330 static void 
2331 i965_proc_picture(VADriverContextP ctx, 
2332                   VAProfile profile, 
2333                   union codec_state *codec_state,
2334                   struct hw_context *hw_context)
2335 {
2336     struct i965_driver_data *i965 = i965_driver_data(ctx);
2337     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
2338     struct proc_state *proc_state = &codec_state->proc;
2339     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
2340     VAProcInputParameterBuffer *input_param = (VAProcInputParameterBuffer *)proc_state->input_param->buffer;
2341     struct object_surface *obj_surface;
2342     struct i965_surface src_surface, dst_surface;
2343     VAStatus status;
2344     int i;
2345     VASurfaceID tmp_surfaces[VA_PROC_PIPELINE_MAX_NUM_FILTERS];
2346     int num_tmp_surfaces = 0;
2347
2348     assert(input_param->surface != VA_INVALID_ID);
2349     assert(proc_state->current_render_target != VA_INVALID_ID);
2350
2351     obj_surface = SURFACE(proc_state->current_render_target);
2352     i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
2353
2354     obj_surface = SURFACE(input_param->surface);
2355     assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'));
2356
2357     src_surface.id = input_param->surface;
2358     src_surface.flag = I965_SURFACE_SURFACE;
2359     
2360     for (i = 0; i < VA_PROC_PIPELINE_MAX_NUM_FILTERS; i++) {
2361         VAProcFilterType filter_type = pipeline_param->filter_pipeline[i];
2362         VASurfaceID out_surface_id = VA_INVALID_ID;
2363         void *filter_param = NULL;
2364
2365         if (procfilter_to_pp_flag[filter_type] != PP_NULL) {
2366             if (proc_state->filter_param[filter_type])
2367                 filter_param = proc_state->filter_param[filter_type]->buffer;
2368
2369             status = i965_CreateSurfaces(ctx,
2370                                          obj_surface->orig_width,
2371                                          obj_surface->orig_height,
2372                                          VA_RT_FORMAT_YUV420,
2373                                          1,
2374                                          &out_surface_id);
2375             assert(status == VA_STATUS_SUCCESS);
2376             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
2377             obj_surface = SURFACE(out_surface_id);
2378             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
2379             dst_surface.id = out_surface_id;
2380             dst_surface.flag = I965_SURFACE_SURFACE;
2381             i965_post_processing_internal(ctx, &proc_context->pp_context,
2382                                           &src_surface,
2383                                           &input_param->region,
2384                                           &dst_surface,
2385                                           &input_param->region,
2386                                           procfilter_to_pp_flag[filter_type],
2387                                           filter_param);
2388             src_surface.id = dst_surface.id;
2389         }
2390     }
2391
2392     dst_surface.id = proc_state->current_render_target;
2393     dst_surface.flag = I965_SURFACE_SURFACE;
2394     i965_post_processing_internal(ctx, &proc_context->pp_context,
2395                                   &src_surface,
2396                                   &input_param->region,
2397                                   &dst_surface,
2398                                   &pipeline_param->output_region,
2399                                   PP_NV12_AVS,
2400                                   NULL);
2401
2402     if (num_tmp_surfaces)
2403         i965_DestroySurfaces(ctx,
2404                              tmp_surfaces,
2405                              num_tmp_surfaces);
2406
2407     intel_batchbuffer_flush(hw_context->batch);
2408 }
2409
2410 static void
2411 i965_proc_context_destroy(void *hw_context)
2412 {
2413     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
2414
2415     i965_post_processing_context_finalize(&proc_context->pp_context);
2416     intel_batchbuffer_free(proc_context->base.batch);
2417     free(proc_context);
2418 }
2419
2420 struct hw_context *
2421 i965_proc_context_init(VADriverContextP ctx, VAProfile profile)
2422 {
2423     struct intel_driver_data *intel = intel_driver_data(ctx);
2424     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
2425
2426     proc_context->base.destroy = i965_proc_context_destroy;
2427     proc_context->base.run = i965_proc_picture;
2428     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
2429     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
2430
2431     return (struct hw_context *)proc_context;
2432 }