i965_drv_video: emit base address command before other commands
[profile/ivi/vaapi-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include <va/va_backend.h>
35
36 #include "intel_batchbuffer.h"
37 #include "intel_driver.h"
38 #include "i965_defines.h"
39 #include "i965_structs.h"
40 #include "i965_drv_video.h"
41 #include "i965_post_processing.h"
42 #include "i965_render.h"
43
44 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
45                      IS_GEN6((ctx)->intel.device_id) ||         \
46                      IS_GEN7((ctx)->intel.device_id))
47
48 #define SURFACE_STATE_PADDED_SIZE_0_I965        ALIGN(sizeof(struct i965_surface_state), 32)
49 #define SURFACE_STATE_PADDED_SIZE_1_I965        ALIGN(sizeof(struct i965_surface_state2), 32)
50 #define SURFACE_STATE_PADDED_SIZE_I965          MAX(SURFACE_STATE_PADDED_SIZE_0_I965, SURFACE_STATE_PADDED_SIZE_1_I965)
51
52 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_I965
53 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
54 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
55
56 static const uint32_t pp_null_gen5[][4] = {
57 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
58 };
59
60 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
61 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
62 };
63
64 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
65 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
66 };
67
68 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
69 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
70 };
71
72 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
73 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
74 };
75
76 static const uint32_t pp_nv12_scaling_gen5[][4] = {
77 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
78 };
79
80 static const uint32_t pp_nv12_avs_gen5[][4] = {
81 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
82 };
83
84 static const uint32_t pp_nv12_dndi_gen5[][4] = {
85 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
86 };
87
88 static void pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
89                                const struct i965_surface *src_surface,
90                                const VARectangle *src_rect,
91                                const struct i965_surface *dst_surface,
92                                const VARectangle *dst_rect,
93                                void *filter_param);
94 static void pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
95                                    const struct i965_surface *src_surface,
96                                    const VARectangle *src_rect,
97                                    const struct i965_surface *dst_surface,
98                                    const VARectangle *dst_rect,
99                                    void *filter_param);
100 static void pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
101                                        const struct i965_surface *src_surface,
102                                        const VARectangle *src_rect,
103                                        const struct i965_surface *dst_surface,
104                                        const VARectangle *dst_rect,
105                                        void *filter_param);
106 static void pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
107                                             const struct i965_surface *src_surface,
108                                             const VARectangle *src_rect,
109                                             const struct i965_surface *dst_surface,
110                                             const VARectangle *dst_rect,
111                                             void *filter_param);
112 static void pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
113                                     const struct i965_surface *src_surface,
114                                     const VARectangle *src_rect,
115                                     const struct i965_surface *dst_surface,
116                                     const VARectangle *dst_rect,
117                                     void *filter_param);
118
119 static struct pp_module pp_modules_gen5[] = {
120     {
121         {
122             "NULL module (for testing)",
123             PP_NULL,
124             pp_null_gen5,
125             sizeof(pp_null_gen5),
126             NULL,
127         },
128
129         pp_null_initialize,
130     },
131
132     {
133         {
134             "NV12_NV12",
135             PP_NV12_LOAD_SAVE_N12,
136             pp_nv12_load_save_nv12_gen5,
137             sizeof(pp_nv12_load_save_nv12_gen5),
138             NULL,
139         },
140
141         pp_plx_load_save_plx_initialize,
142     },
143
144     {
145         {
146             "NV12_PL3",
147             PP_NV12_LOAD_SAVE_PL3,
148             pp_nv12_load_save_pl3_gen5,
149             sizeof(pp_nv12_load_save_pl3_gen5),
150             NULL,
151         },
152
153         pp_plx_load_save_plx_initialize,
154     },
155
156     {
157         {
158             "PL3_NV12",
159             PP_PL3_LOAD_SAVE_N12,
160             pp_pl3_load_save_nv12_gen5,
161             sizeof(pp_pl3_load_save_nv12_gen5),
162             NULL,
163         },
164
165         pp_plx_load_save_plx_initialize,
166     },
167
168     {
169         {
170             "PL3_PL3",
171             PP_PL3_LOAD_SAVE_N12,
172             pp_pl3_load_save_pl3_gen5,
173             sizeof(pp_pl3_load_save_pl3_gen5),
174             NULL,
175         },
176
177         pp_plx_load_save_plx_initialize
178     },
179
180     {
181         {
182             "NV12 Scaling module",
183             PP_NV12_SCALING,
184             pp_nv12_scaling_gen5,
185             sizeof(pp_nv12_scaling_gen5),
186             NULL,
187         },
188
189         pp_nv12_scaling_initialize,
190     },
191
192     {
193         {
194             "NV12 AVS module",
195             PP_NV12_AVS,
196             pp_nv12_avs_gen5,
197             sizeof(pp_nv12_avs_gen5),
198             NULL,
199         },
200
201         pp_nv12_avs_initialize,
202     },
203
204     {
205         {
206             "NV12 DNDI module",
207             PP_NV12_DNDI,
208             pp_nv12_dndi_gen5,
209             sizeof(pp_nv12_dndi_gen5),
210             NULL,
211         },
212
213         pp_nv12_dndi_initialize,
214     },
215 };
216
217 static const uint32_t pp_null_gen6[][4] = {
218 #include "shaders/post_processing/gen5_6/null.g6b"
219 };
220
221 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
222 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
223 };
224
225 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
226 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
227 };
228
229 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
230 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
231 };
232
233 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
234 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
235 };
236
237 static const uint32_t pp_nv12_scaling_gen6[][4] = {
238 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g6b"
239 };
240
241 static const uint32_t pp_nv12_avs_gen6[][4] = {
242 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
243 };
244
245 static const uint32_t pp_nv12_dndi_gen6[][4] = {
246 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
247 };
248
249 static struct pp_module pp_modules_gen6[] = {
250     {
251         {
252             "NULL module (for testing)",
253             PP_NULL,
254             pp_null_gen6,
255             sizeof(pp_null_gen6),
256             NULL,
257         },
258
259         pp_null_initialize,
260     },
261
262     {
263         {
264             "NV12_NV12",
265             PP_NV12_LOAD_SAVE_N12,
266             pp_nv12_load_save_nv12_gen6,
267             sizeof(pp_nv12_load_save_nv12_gen6),
268             NULL,
269         },
270
271         pp_plx_load_save_plx_initialize,
272     },
273
274     {
275         {
276             "NV12_PL3",
277             PP_NV12_LOAD_SAVE_PL3,
278             pp_nv12_load_save_pl3_gen6,
279             sizeof(pp_nv12_load_save_pl3_gen6),
280             NULL,
281         },
282         
283         pp_plx_load_save_plx_initialize,
284     },
285
286     {
287         {
288             "PL3_NV12",
289             PP_PL3_LOAD_SAVE_N12,
290             pp_pl3_load_save_nv12_gen6,
291             sizeof(pp_pl3_load_save_nv12_gen6),
292             NULL,
293         },
294
295         pp_plx_load_save_plx_initialize,
296     },
297
298     {
299         {
300             "PL3_PL3",
301             PP_PL3_LOAD_SAVE_N12,
302             pp_pl3_load_save_pl3_gen6,
303             sizeof(pp_pl3_load_save_pl3_gen6),
304             NULL,
305         },
306
307         pp_plx_load_save_plx_initialize,
308     },
309
310     {
311         {
312             "NV12 Scaling module",
313             PP_NV12_SCALING,
314             pp_nv12_scaling_gen6,
315             sizeof(pp_nv12_scaling_gen6),
316             NULL,
317         },
318
319         pp_nv12_scaling_initialize,
320     },
321
322     {
323         {
324             "NV12 AVS module",
325             PP_NV12_AVS,
326             pp_nv12_avs_gen6,
327             sizeof(pp_nv12_avs_gen6),
328             NULL,
329         },
330
331         pp_nv12_avs_initialize,
332     },
333
334     {
335         {
336             "NV12 DNDI module",
337             PP_NV12_DNDI,
338             pp_nv12_dndi_gen6,
339             sizeof(pp_nv12_dndi_gen6),
340             NULL,
341         },
342
343         pp_nv12_dndi_initialize,
344     },
345 };
346
347 #define pp_static_parameter     pp_context->pp_static_parameter
348 #define pp_inline_parameter     pp_context->pp_inline_parameter
349
350 static int
351 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
352 {
353     struct i965_driver_data *i965 = i965_driver_data(ctx);
354     int fourcc;
355
356     if (surface->flag == I965_SURFACE_IMAGE) {
357         struct object_image *obj_image = IMAGE(surface->id);
358         fourcc = obj_image->image.format.fourcc;
359     } else {
360         struct object_surface *obj_surface = SURFACE(surface->id);
361         fourcc = obj_surface->fourcc;
362     }
363
364     return fourcc;
365 }
366
367 static void
368 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
369 {
370     switch (tiling) {
371     case I915_TILING_NONE:
372         ss->ss3.tiled_surface = 0;
373         ss->ss3.tile_walk = 0;
374         break;
375     case I915_TILING_X:
376         ss->ss3.tiled_surface = 1;
377         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
378         break;
379     case I915_TILING_Y:
380         ss->ss3.tiled_surface = 1;
381         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
382         break;
383     }
384 }
385
386 static void
387 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
388 {
389     switch (tiling) {
390     case I915_TILING_NONE:
391         ss->ss2.tiled_surface = 0;
392         ss->ss2.tile_walk = 0;
393         break;
394     case I915_TILING_X:
395         ss->ss2.tiled_surface = 1;
396         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
397         break;
398     case I915_TILING_Y:
399         ss->ss2.tiled_surface = 1;
400         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
401         break;
402     }
403 }
404
405 static void
406 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
407 {
408     struct i965_interface_descriptor *desc;
409     dri_bo *bo;
410     int pp_index = pp_context->current_pp;
411
412     bo = pp_context->idrt.bo;
413     dri_bo_map(bo, 1);
414     assert(bo->virtual);
415     desc = bo->virtual;
416     memset(desc, 0, sizeof(*desc));
417     desc->desc0.grf_reg_blocks = 10;
418     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
419     desc->desc1.const_urb_entry_read_offset = 0;
420     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
421     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
422     desc->desc2.sampler_count = 0;
423     desc->desc3.binding_table_entry_count = 0;
424     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
425
426     dri_bo_emit_reloc(bo,
427                       I915_GEM_DOMAIN_INSTRUCTION, 0,
428                       desc->desc0.grf_reg_blocks,
429                       offsetof(struct i965_interface_descriptor, desc0),
430                       pp_context->pp_modules[pp_index].kernel.bo);
431
432     dri_bo_emit_reloc(bo,
433                       I915_GEM_DOMAIN_INSTRUCTION, 0,
434                       desc->desc2.sampler_count << 2,
435                       offsetof(struct i965_interface_descriptor, desc2),
436                       pp_context->sampler_state_table.bo);
437
438     dri_bo_unmap(bo);
439     pp_context->idrt.num_interface_descriptors++;
440 }
441
442 static void
443 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
444 {
445     struct i965_vfe_state *vfe_state;
446     dri_bo *bo;
447
448     bo = pp_context->vfe_state.bo;
449     dri_bo_map(bo, 1);
450     assert(bo->virtual);
451     vfe_state = bo->virtual;
452     memset(vfe_state, 0, sizeof(*vfe_state));
453     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
454     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
455     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
456     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
457     vfe_state->vfe1.children_present = 0;
458     vfe_state->vfe2.interface_descriptor_base = 
459         pp_context->idrt.bo->offset >> 4; /* reloc */
460     dri_bo_emit_reloc(bo,
461                       I915_GEM_DOMAIN_INSTRUCTION, 0,
462                       0,
463                       offsetof(struct i965_vfe_state, vfe2),
464                       pp_context->idrt.bo);
465     dri_bo_unmap(bo);
466 }
467
468 static void
469 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
470 {
471     unsigned char *constant_buffer;
472
473     assert(sizeof(pp_static_parameter) == 128);
474     dri_bo_map(pp_context->curbe.bo, 1);
475     assert(pp_context->curbe.bo->virtual);
476     constant_buffer = pp_context->curbe.bo->virtual;
477     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
478     dri_bo_unmap(pp_context->curbe.bo);
479 }
480
481 static void
482 ironlake_pp_states_setup(VADriverContextP ctx,
483                          struct i965_post_processing_context *pp_context)
484 {
485     ironlake_pp_interface_descriptor_table(pp_context);
486     ironlake_pp_vfe_state(pp_context);
487     ironlake_pp_upload_constants(pp_context);
488 }
489
490 static void
491 ironlake_pp_pipeline_select(VADriverContextP ctx,
492                             struct i965_post_processing_context *pp_context)
493 {
494     struct intel_batchbuffer *batch = pp_context->batch;
495
496     BEGIN_BATCH(batch, 1);
497     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
498     ADVANCE_BATCH(batch);
499 }
500
501 static void
502 ironlake_pp_urb_layout(VADriverContextP ctx,
503                        struct i965_post_processing_context *pp_context)
504 {
505     struct intel_batchbuffer *batch = pp_context->batch;
506     unsigned int vfe_fence, cs_fence;
507
508     vfe_fence = pp_context->urb.cs_start;
509     cs_fence = pp_context->urb.size;
510
511     BEGIN_BATCH(batch, 3);
512     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
513     OUT_BATCH(batch, 0);
514     OUT_BATCH(batch, 
515               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
516               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
517     ADVANCE_BATCH(batch);
518 }
519
520 static void
521 ironlake_pp_state_base_address(VADriverContextP ctx,
522                                struct i965_post_processing_context *pp_context)
523 {
524     struct intel_batchbuffer *batch = pp_context->batch;
525
526     BEGIN_BATCH(batch, 8);
527     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
528     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
529     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
530     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
531     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
532     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
533     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
534     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
535     ADVANCE_BATCH(batch);
536 }
537
538 static void
539 ironlake_pp_state_pointers(VADriverContextP ctx,
540                            struct i965_post_processing_context *pp_context)
541 {
542     struct intel_batchbuffer *batch = pp_context->batch;
543
544     BEGIN_BATCH(batch, 3);
545     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
546     OUT_BATCH(batch, 0);
547     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
548     ADVANCE_BATCH(batch);
549 }
550
551 static void 
552 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
553                           struct i965_post_processing_context *pp_context)
554 {
555     struct intel_batchbuffer *batch = pp_context->batch;
556
557     BEGIN_BATCH(batch, 2);
558     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
559     OUT_BATCH(batch,
560               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
561               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
562     ADVANCE_BATCH(batch);
563 }
564
565 static void
566 ironlake_pp_constant_buffer(VADriverContextP ctx,
567                             struct i965_post_processing_context *pp_context)
568 {
569     struct intel_batchbuffer *batch = pp_context->batch;
570
571     BEGIN_BATCH(batch, 2);
572     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
573     OUT_RELOC(batch, pp_context->curbe.bo,
574               I915_GEM_DOMAIN_INSTRUCTION, 0,
575               pp_context->urb.size_cs_entry - 1);
576     ADVANCE_BATCH(batch);    
577 }
578
579 static void
580 ironlake_pp_object_walker(VADriverContextP ctx,
581                           struct i965_post_processing_context *pp_context)
582 {
583     struct intel_batchbuffer *batch = pp_context->batch;
584     int x, x_steps, y, y_steps;
585
586     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
587     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
588
589     for (y = 0; y < y_steps; y++) {
590         for (x = 0; x < x_steps; x++) {
591             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
592                 BEGIN_BATCH(batch, 20);
593                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
594                 OUT_BATCH(batch, 0);
595                 OUT_BATCH(batch, 0); /* no indirect data */
596                 OUT_BATCH(batch, 0);
597
598                 /* inline data grf 5-6 */
599                 assert(sizeof(pp_inline_parameter) == 64);
600                 intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
601
602                 ADVANCE_BATCH(batch);
603             }
604         }
605     }
606 }
607
608 static void
609 ironlake_pp_pipeline_setup(VADriverContextP ctx,
610                            struct i965_post_processing_context *pp_context)
611 {
612     struct intel_batchbuffer *batch = pp_context->batch;
613
614     intel_batchbuffer_start_atomic(batch, 0x1000);
615     intel_batchbuffer_emit_mi_flush(batch);
616     ironlake_pp_pipeline_select(ctx, pp_context);
617     ironlake_pp_state_base_address(ctx, pp_context);
618     ironlake_pp_state_pointers(ctx, pp_context);
619     ironlake_pp_urb_layout(ctx, pp_context);
620     ironlake_pp_cs_urb_layout(ctx, pp_context);
621     ironlake_pp_constant_buffer(ctx, pp_context);
622     ironlake_pp_object_walker(ctx, pp_context);
623     intel_batchbuffer_end_atomic(batch);
624 }
625
626 static void
627 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
628                           dri_bo *surf_bo, unsigned long surf_bo_offset,
629                           int width, int height, int pitch, int format, 
630                           int index, int is_target)
631 {
632     struct i965_surface_state *ss;
633     dri_bo *ss_bo;
634     unsigned int tiling;
635     unsigned int swizzle;
636
637     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
638     ss_bo = pp_context->surface_state_binding_table.bo;
639     assert(ss_bo);
640
641     dri_bo_map(ss_bo, True);
642     assert(ss_bo->virtual);
643     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
644     memset(ss, 0, sizeof(*ss));
645     ss->ss0.surface_type = I965_SURFACE_2D;
646     ss->ss0.surface_format = format;
647     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
648     ss->ss2.width = width - 1;
649     ss->ss2.height = height - 1;
650     ss->ss3.pitch = pitch - 1;
651     pp_set_surface_tiling(ss, tiling);
652     dri_bo_emit_reloc(ss_bo,
653                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
654                       surf_bo_offset,
655                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
656                       surf_bo);
657     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
658     dri_bo_unmap(ss_bo);
659 }
660
661 static void
662 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
663                            dri_bo *surf_bo, unsigned long surf_bo_offset,
664                            int width, int height, int wpitch,
665                            int xoffset, int yoffset,
666                            int format, int interleave_chroma,
667                            int index)
668 {
669     struct i965_surface_state2 *ss2;
670     dri_bo *ss2_bo;
671     unsigned int tiling;
672     unsigned int swizzle;
673
674     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
675     ss2_bo = pp_context->surface_state_binding_table.bo;
676     assert(ss2_bo);
677
678     dri_bo_map(ss2_bo, True);
679     assert(ss2_bo->virtual);
680     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
681     memset(ss2, 0, sizeof(*ss2));
682     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
683     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
684     ss2->ss1.width = width - 1;
685     ss2->ss1.height = height - 1;
686     ss2->ss2.pitch = wpitch - 1;
687     ss2->ss2.interleave_chroma = interleave_chroma;
688     ss2->ss2.surface_format = format;
689     ss2->ss3.x_offset_for_cb = xoffset;
690     ss2->ss3.y_offset_for_cb = yoffset;
691     pp_set_surface2_tiling(ss2, tiling);
692     dri_bo_emit_reloc(ss2_bo,
693                       I915_GEM_DOMAIN_RENDER, 0,
694                       surf_bo_offset,
695                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
696                       surf_bo);
697     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
698     dri_bo_unmap(ss2_bo);
699 }
700
701 static void 
702 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
703                                 const struct i965_surface *surface, 
704                                 int base_index, int is_target,
705                                 int *width, int *height, int *pitch, int *offset)
706 {
707     struct i965_driver_data *i965 = i965_driver_data(ctx);
708     struct object_surface *obj_surface;
709     struct object_image *obj_image;
710     dri_bo *bo;
711     int fourcc = pp_get_surface_fourcc(ctx, surface);
712     const int Y = 0;
713     const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
714     const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
715     const int UV = 1;
716     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
717
718     if (surface->flag == I965_SURFACE_SURFACE) {
719         obj_surface = SURFACE(surface->id);
720         bo = obj_surface->bo;
721         width[0] = obj_surface->orig_width;
722         height[0] = obj_surface->orig_height;
723         pitch[0] = obj_surface->width;
724         offset[0] = 0;
725
726         if (interleaved_uv) {
727             width[1] = obj_surface->orig_width;
728             height[1] = obj_surface->orig_height / 2;
729             pitch[1] = obj_surface->width;
730             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
731         } else {
732             width[1] = obj_surface->orig_width / 2;
733             height[1] = obj_surface->orig_height / 2;
734             pitch[1] = obj_surface->width / 2;
735             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
736             width[2] = obj_surface->orig_width / 2;
737             height[2] = obj_surface->orig_height / 2;
738             pitch[2] = obj_surface->width / 2;
739             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
740         }
741     } else {
742         obj_image = IMAGE(surface->id);
743         bo = obj_image->bo;
744         width[0] = obj_image->image.width;
745         height[0] = obj_image->image.height;
746         pitch[0] = obj_image->image.pitches[0];
747         offset[0] = obj_image->image.offsets[0];
748
749         if (interleaved_uv) {
750             width[1] = obj_image->image.width;
751             height[1] = obj_image->image.height / 2;
752             pitch[1] = obj_image->image.pitches[1];
753             offset[1] = obj_image->image.offsets[1];
754         } else {
755             width[1] = obj_image->image.width / 2;
756             height[1] = obj_image->image.height / 2;
757             pitch[1] = obj_image->image.pitches[1];
758             offset[1] = obj_image->image.offsets[1];
759             width[2] = obj_image->image.width / 2;
760             height[2] = obj_image->image.height / 2;
761             pitch[2] = obj_image->image.pitches[2];
762             offset[2] = obj_image->image.offsets[2];
763         }
764     }
765
766     /* Y surface */
767     i965_pp_set_surface_state(ctx, pp_context,
768                               bo, offset[Y],
769                               width[Y] / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
770                               base_index, is_target);
771
772     if (interleaved_uv) {
773         i965_pp_set_surface_state(ctx, pp_context,
774                                   bo, offset[UV],
775                                   width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
776                                   base_index + 1, is_target);
777     } else {
778         /* U surface */
779         i965_pp_set_surface_state(ctx, pp_context,
780                                   bo, offset[U],
781                                   width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
782                                   base_index + 1, is_target);
783
784         /* V surface */
785         i965_pp_set_surface_state(ctx, pp_context,
786                                   bo, offset[V],
787                                   width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
788                                   base_index + 2, is_target);
789     }
790
791 }
792
793 static int
794 pp_null_x_steps(void *private_context)
795 {
796     return 1;
797 }
798
799 static int
800 pp_null_y_steps(void *private_context)
801 {
802     return 1;
803 }
804
805 static int
806 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
807 {
808     return 0;
809 }
810
811 static void
812 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
813                    const struct i965_surface *src_surface,
814                    const VARectangle *src_rect,
815                    const struct i965_surface *dst_surface,
816                    const VARectangle *dst_rect,
817                    void *filter_param)
818 {
819     /* private function & data */
820     pp_context->pp_x_steps = pp_null_x_steps;
821     pp_context->pp_y_steps = pp_null_y_steps;
822     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
823 }
824
825 static int
826 pp_load_save_x_steps(void *private_context)
827 {
828     return 1;
829 }
830
831 static int
832 pp_load_save_y_steps(void *private_context)
833 {
834     struct pp_load_save_context *pp_load_save_context = private_context;
835
836     return pp_load_save_context->dest_h / 8;
837 }
838
839 static int
840 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
841 {
842     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
843     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
844     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
845     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
846
847     return 0;
848 }
849
850 static void
851 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
852                                 const struct i965_surface *src_surface,
853                                 const VARectangle *src_rect,
854                                 const struct i965_surface *dst_surface,
855                                 const VARectangle *dst_rect,
856                                 void *filter_param)
857 {
858     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
859     int width[3], height[3], pitch[3], offset[3];
860     const int Y = 0;
861
862     /* source surface */
863     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
864                                     width, height, pitch, offset);
865
866     /* destination surface */
867     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
868                                     width, height, pitch, offset);
869
870     /* private function & data */
871     pp_context->pp_x_steps = pp_load_save_x_steps;
872     pp_context->pp_y_steps = pp_load_save_y_steps;
873     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
874     pp_load_save_context->dest_h = ALIGN(height[Y], 16);
875     pp_load_save_context->dest_w = ALIGN(width[Y], 16);
876
877     pp_inline_parameter.grf5.block_count_x = ALIGN(width[Y], 16) / 16;   /* 1 x N */
878     pp_inline_parameter.grf5.number_blocks = ALIGN(width[Y], 16) / 16;
879 }
880
881 static int
882 pp_scaling_x_steps(void *private_context)
883 {
884     return 1;
885 }
886
887 static int
888 pp_scaling_y_steps(void *private_context)
889 {
890     struct pp_scaling_context *pp_scaling_context = private_context;
891
892     return pp_scaling_context->dest_h / 8;
893 }
894
895 static int
896 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
897 {
898     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
899     float src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
900     float src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
901
902     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
903     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
904     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
905     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
906     
907     return 0;
908 }
909
910 static void
911 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
912                            const struct i965_surface *src_surface,
913                            const VARectangle *src_rect,
914                            const struct i965_surface *dst_surface,
915                            const VARectangle *dst_rect,
916                            void *filter_param)
917 {
918     struct i965_driver_data *i965 = i965_driver_data(ctx);
919     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
920     struct object_surface *obj_surface;
921     struct i965_sampler_state *sampler_state;
922     int in_w, in_h, in_wpitch, in_hpitch;
923     int out_w, out_h, out_wpitch, out_hpitch;
924
925     /* source surface */
926     obj_surface = SURFACE(src_surface->id);
927     in_w = obj_surface->orig_width;
928     in_h = obj_surface->orig_height;
929     in_wpitch = obj_surface->width;
930     in_hpitch = obj_surface->height;
931
932     /* source Y surface index 1 */
933     i965_pp_set_surface_state(ctx, pp_context,
934                               obj_surface->bo, 0,
935                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
936                               1, 0);
937
938     /* source UV surface index 2 */
939     i965_pp_set_surface_state(ctx, pp_context,
940                               obj_surface->bo, in_wpitch * in_hpitch,
941                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
942                               2, 0);
943
944     /* destination surface */
945     obj_surface = SURFACE(dst_surface->id);
946     out_w = obj_surface->orig_width;
947     out_h = obj_surface->orig_height;
948     out_wpitch = obj_surface->width;
949     out_hpitch = obj_surface->height;
950
951     /* destination Y surface index 7 */
952     i965_pp_set_surface_state(ctx, pp_context,
953                               obj_surface->bo, 0,
954                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
955                               7, 1);
956
957     /* destination UV surface index 8 */
958     i965_pp_set_surface_state(ctx, pp_context,
959                               obj_surface->bo, out_wpitch * out_hpitch,
960                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
961                               8, 1);
962
963     /* sampler state */
964     dri_bo_map(pp_context->sampler_state_table.bo, True);
965     assert(pp_context->sampler_state_table.bo->virtual);
966     sampler_state = pp_context->sampler_state_table.bo->virtual;
967
968     /* SIMD16 Y index 1 */
969     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
970     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
971     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
972     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
973     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
974
975     /* SIMD16 UV index 2 */
976     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
977     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
978     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
979     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
980     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
981
982     dri_bo_unmap(pp_context->sampler_state_table.bo);
983
984     /* private function & data */
985     pp_context->pp_x_steps = pp_scaling_x_steps;
986     pp_context->pp_y_steps = pp_scaling_y_steps;
987     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
988
989     pp_scaling_context->dest_x = dst_rect->x;
990     pp_scaling_context->dest_y = dst_rect->y;
991     pp_scaling_context->dest_w = ALIGN(dst_rect->width, 16);
992     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 16);
993     pp_scaling_context->src_normalized_x = (float)src_rect->x / in_w / out_w;
994     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h / out_h;
995
996     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / out_h;
997
998     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / out_w;
999     pp_inline_parameter.grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
1000     pp_inline_parameter.grf5.number_blocks = pp_scaling_context->dest_w / 16;
1001     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1002     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1003 }
1004
1005 static int
1006 pp_avs_x_steps(void *private_context)
1007 {
1008     struct pp_avs_context *pp_avs_context = private_context;
1009
1010     return pp_avs_context->dest_w / 16;
1011 }
1012
1013 static int
1014 pp_avs_y_steps(void *private_context)
1015 {
1016     return 1;
1017 }
1018
1019 static int
1020 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1021 {
1022     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1023     float src_x_steping, src_y_steping, video_step_delta;
1024     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
1025
1026     if (tmp_w >= pp_avs_context->dest_w) {
1027         pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1028         pp_inline_parameter.grf6.video_step_delta = 0;
1029         
1030         if (x == 0) {
1031             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
1032                 pp_avs_context->src_normalized_x;
1033         } else {
1034             src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1035             video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1036             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1037                 16 * 15 * video_step_delta / 2;
1038         }
1039     } else {
1040         int n0, n1, n2, nls_left, nls_right;
1041         int factor_a = 5, factor_b = 4;
1042         float f;
1043
1044         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1045         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1046         n2 = tmp_w / (16 * factor_a);
1047         nls_left = n0 + n2;
1048         nls_right = n1 + n2;
1049         f = (float) n2 * 16 / tmp_w;
1050         
1051         if (n0 < 5) {
1052             pp_inline_parameter.grf6.video_step_delta = 0.0;
1053
1054             if (x == 0) {
1055                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1056                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1057             } else {
1058                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1059                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1060                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1061                     16 * 15 * video_step_delta / 2;
1062             }
1063         } else {
1064             if (x < nls_left) {
1065                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1066                 float a = f / (nls_left * 16 * factor_b);
1067                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1068                 
1069                 pp_inline_parameter.grf6.video_step_delta = b;
1070
1071                 if (x == 0) {
1072                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1073                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a;
1074                 } else {
1075                     src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1076                     video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1077                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1078                         16 * 15 * video_step_delta / 2;
1079                     pp_inline_parameter.grf5.normalized_video_x_scaling_step += 16 * b;
1080                 }
1081             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1082                 /* scale the center linearly */
1083                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1084                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1085                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1086                     16 * 15 * video_step_delta / 2;
1087                 pp_inline_parameter.grf6.video_step_delta = 0.0;
1088                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1089             } else {
1090                 float a = f / (nls_right * 16 * factor_b);
1091                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1092
1093                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1094                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1095                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1096                     16 * 15 * video_step_delta / 2;
1097                 pp_inline_parameter.grf6.video_step_delta = -b;
1098
1099                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
1100                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
1101                 else
1102                     pp_inline_parameter.grf5.normalized_video_x_scaling_step -= b * 16;
1103             }
1104         }
1105     }
1106
1107     src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
1108     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
1109     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
1110     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
1111
1112     return 0;
1113 }
1114
1115 static void
1116 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1117                        const struct i965_surface *src_surface,
1118                        const VARectangle *src_rect,
1119                        const struct i965_surface *dst_surface,
1120                        const VARectangle *dst_rect,
1121                        void *filter_param)
1122 {
1123     struct i965_driver_data *i965 = i965_driver_data(ctx);
1124     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1125     struct object_surface *obj_surface;
1126     struct i965_sampler_8x8 *sampler_8x8;
1127     struct i965_sampler_8x8_state *sampler_8x8_state;
1128     int index;
1129     int in_w, in_h, in_wpitch, in_hpitch;
1130     int out_w, out_h, out_wpitch, out_hpitch;
1131
1132     /* surface */
1133     obj_surface = SURFACE(src_surface->id);
1134     in_w = obj_surface->orig_width;
1135     in_h = obj_surface->orig_height;
1136     in_wpitch = obj_surface->width;
1137     in_hpitch = obj_surface->height;
1138
1139     /* source Y surface index 1 */
1140     i965_pp_set_surface2_state(ctx, pp_context,
1141                                obj_surface->bo, 0,
1142                                in_w, in_h, in_wpitch,
1143                                0, 0,
1144                                SURFACE_FORMAT_Y8_UNORM, 0,
1145                                1);
1146
1147     /* source UV surface index 2 */
1148     i965_pp_set_surface2_state(ctx, pp_context,
1149                                obj_surface->bo, in_wpitch * in_hpitch,
1150                                in_w, in_h, in_wpitch,
1151                                0, 0,
1152                                SURFACE_FORMAT_PLANAR_420_8, 1,
1153                                2);
1154
1155     /* destination surface */
1156     obj_surface = SURFACE(dst_surface->id);
1157     out_w = obj_surface->orig_width;
1158     out_h = obj_surface->orig_height;
1159     out_wpitch = obj_surface->width;
1160     out_hpitch = obj_surface->height;
1161     assert(out_w <= out_wpitch && out_h <= out_hpitch);
1162
1163     /* destination Y surface index 7 */
1164     i965_pp_set_surface_state(ctx, pp_context,
1165                               obj_surface->bo, 0,
1166                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1167                               7, 1);
1168
1169     /* destination UV surface index 8 */
1170     i965_pp_set_surface_state(ctx, pp_context,
1171                               obj_surface->bo, out_wpitch * out_hpitch,
1172                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1173                               8, 1);
1174
1175     /* sampler 8x8 state */
1176     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1177     assert(pp_context->sampler_state_table.bo_8x8->virtual);
1178     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1179     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1180     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1181     sampler_8x8_state->dw136.default_sharpness_level = 0;
1182     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1183     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1184     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1185     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1186
1187     /* sampler 8x8 */
1188     dri_bo_map(pp_context->sampler_state_table.bo, True);
1189     assert(pp_context->sampler_state_table.bo->virtual);
1190     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
1191     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1192
1193     /* sample_8x8 Y index 1 */
1194     index = 1;
1195     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1196     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1197     sampler_8x8[index].dw0.ief_bypass = 0;
1198     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1199     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1200     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1201     sampler_8x8[index].dw2.global_noise_estimation = 22;
1202     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1203     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1204     sampler_8x8[index].dw3.strong_edge_weight = 7;
1205     sampler_8x8[index].dw3.regular_weight = 2;
1206     sampler_8x8[index].dw3.non_edge_weight = 0;
1207     sampler_8x8[index].dw3.gain_factor = 40;
1208     sampler_8x8[index].dw4.steepness_boost = 0;
1209     sampler_8x8[index].dw4.steepness_threshold = 0;
1210     sampler_8x8[index].dw4.mr_boost = 0;
1211     sampler_8x8[index].dw4.mr_threshold = 5;
1212     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1213     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1214     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1215     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1216     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1217     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1218     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1219     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1220     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1221     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1222     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1223     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1224     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1225     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1226     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1227     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1228     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1229     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1230     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1231     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1232     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1233     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1234     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1235     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1236     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1237     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1238     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1239     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1240     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1241     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1242     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1243     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1244     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1245     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1246     sampler_8x8[index].dw13.limiter_boost = 0;
1247     sampler_8x8[index].dw13.minimum_limiter = 10;
1248     sampler_8x8[index].dw13.maximum_limiter = 11;
1249     sampler_8x8[index].dw14.clip_limiter = 130;
1250     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1251                       I915_GEM_DOMAIN_RENDER, 
1252                       0,
1253                       0,
1254                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1255                       pp_context->sampler_state_table.bo_8x8);
1256
1257     dri_bo_map(pp_context->sampler_state_table.bo_8x8_uv, True);
1258     assert(pp_context->sampler_state_table.bo_8x8_uv->virtual);
1259     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1260     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8_uv->virtual;
1261     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1262     sampler_8x8_state->dw136.default_sharpness_level = 0;
1263     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0;
1264     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1265     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1266     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8_uv);
1267
1268     /* sample_8x8 UV index 2 */
1269     index = 2;
1270     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1271     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_NEAREST;
1272     sampler_8x8[index].dw0.ief_bypass = 0;
1273     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1274     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1275     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8_uv->offset >> 5;
1276     sampler_8x8[index].dw2.global_noise_estimation = 22;
1277     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1278     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1279     sampler_8x8[index].dw3.strong_edge_weight = 7;
1280     sampler_8x8[index].dw3.regular_weight = 2;
1281     sampler_8x8[index].dw3.non_edge_weight = 0;
1282     sampler_8x8[index].dw3.gain_factor = 40;
1283     sampler_8x8[index].dw4.steepness_boost = 0;
1284     sampler_8x8[index].dw4.steepness_threshold = 0;
1285     sampler_8x8[index].dw4.mr_boost = 0;
1286     sampler_8x8[index].dw4.mr_threshold = 5;
1287     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1288     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1289     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1290     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1291     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1292     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1293     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1294     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1295     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1296     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1297     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1298     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1299     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1300     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1301     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1302     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1303     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1304     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1305     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1306     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1307     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1308     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1309     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1310     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1311     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1312     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1313     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1314     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1315     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1316     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1317     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1318     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1319     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1320     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1321     sampler_8x8[index].dw13.limiter_boost = 0;
1322     sampler_8x8[index].dw13.minimum_limiter = 10;
1323     sampler_8x8[index].dw13.maximum_limiter = 11;
1324     sampler_8x8[index].dw14.clip_limiter = 130;
1325     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1326                       I915_GEM_DOMAIN_RENDER, 
1327                       0,
1328                       0,
1329                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1330                       pp_context->sampler_state_table.bo_8x8_uv);
1331
1332     dri_bo_unmap(pp_context->sampler_state_table.bo);
1333
1334     /* private function & data */
1335     pp_context->pp_x_steps = pp_avs_x_steps;
1336     pp_context->pp_y_steps = pp_avs_y_steps;
1337     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
1338
1339     pp_avs_context->dest_x = dst_rect->x;
1340     pp_avs_context->dest_y = dst_rect->y;
1341     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
1342     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
1343     pp_avs_context->src_normalized_x = (float)src_rect->x / in_w / out_w;
1344     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h / out_h;
1345     pp_avs_context->src_w = src_rect->width;
1346     pp_avs_context->src_h = src_rect->height;
1347
1348     pp_static_parameter.grf4.r4_2.avs.nlas = 1;
1349     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / out_h;
1350
1351     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / out_w;
1352     pp_inline_parameter.grf5.block_count_x = 1;        /* M x 1 */
1353     pp_inline_parameter.grf5.number_blocks = pp_avs_context->dest_h / 8;
1354     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1355     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1356     pp_inline_parameter.grf6.video_step_delta = 0.0;
1357 }
1358
1359 static int
1360 pp_dndi_x_steps(void *private_context)
1361 {
1362     return 1;
1363 }
1364
1365 static int
1366 pp_dndi_y_steps(void *private_context)
1367 {
1368     struct pp_dndi_context *pp_dndi_context = private_context;
1369
1370     return pp_dndi_context->dest_h / 4;
1371 }
1372
1373 static int
1374 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1375 {
1376     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1377     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 4;
1378
1379     return 0;
1380 }
1381
1382 static 
1383 void pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1384                              const struct i965_surface *src_surface,
1385                              const VARectangle *src_rect,
1386                              const struct i965_surface *dst_surface,
1387                              const VARectangle *dst_rect,
1388                              void *filter_param)
1389 {
1390     struct i965_driver_data *i965 = i965_driver_data(ctx);
1391     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
1392     struct object_surface *obj_surface;
1393     struct i965_sampler_dndi *sampler_dndi;
1394     int index;
1395     int w, h;
1396     int orig_w, orig_h;
1397
1398     /* surface */
1399     obj_surface = SURFACE(src_surface->id);
1400     orig_w = obj_surface->orig_width;
1401     orig_h = obj_surface->orig_height;
1402     w = obj_surface->width;
1403     h = obj_surface->height;
1404
1405     if (pp_context->stmm.bo == NULL) {
1406         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
1407                                            "STMM surface",
1408                                            w * h,
1409                                            4096);
1410         assert(pp_context->stmm.bo);
1411     }
1412
1413     /* source UV surface index 2 */
1414     i965_pp_set_surface_state(ctx, pp_context,
1415                               obj_surface->bo, w * h,
1416                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
1417                               2, 0);
1418
1419     /* source YUV surface index 4 */
1420     i965_pp_set_surface2_state(ctx, pp_context,
1421                                obj_surface->bo, 0,
1422                                orig_w, orig_w, w,
1423                                0, h,
1424                                SURFACE_FORMAT_PLANAR_420_8, 1,
1425                                4);
1426
1427     /* source STMM surface index 20 */
1428     i965_pp_set_surface_state(ctx, pp_context,
1429                               pp_context->stmm.bo, 0,
1430                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
1431                               20, 1);
1432
1433     /* destination surface */
1434     obj_surface = SURFACE(dst_surface->id);
1435     orig_w = obj_surface->orig_width;
1436     orig_h = obj_surface->orig_height;
1437     w = obj_surface->width;
1438     h = obj_surface->height;
1439
1440     /* destination Y surface index 7 */
1441     i965_pp_set_surface_state(ctx, pp_context,
1442                               obj_surface->bo, 0,
1443                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
1444                               7, 1);
1445
1446     /* destination UV surface index 8 */
1447     i965_pp_set_surface_state(ctx, pp_context,
1448                               obj_surface->bo, w * h,
1449                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
1450                               8, 1);
1451     /* sampler dndi */
1452     dri_bo_map(pp_context->sampler_state_table.bo, True);
1453     assert(pp_context->sampler_state_table.bo->virtual);
1454     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
1455     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
1456
1457     /* sample dndi index 1 */
1458     index = 0;
1459     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
1460     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
1461     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
1462     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
1463
1464     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
1465     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
1466     sampler_dndi[index].dw1.stmm_c2 = 0;
1467     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
1468     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
1469
1470     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
1471     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
1472     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
1473     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
1474
1475     sampler_dndi[index].dw3.maximum_stmm = 128;
1476     sampler_dndi[index].dw3.multipler_for_vecm = 2;
1477     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
1478     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
1479     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
1480
1481     sampler_dndi[index].dw4.sdi_delta = 8;
1482     sampler_dndi[index].dw4.sdi_threshold = 128;
1483     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
1484     sampler_dndi[index].dw4.stmm_shift_up = 0;
1485     sampler_dndi[index].dw4.stmm_shift_down = 0;
1486     sampler_dndi[index].dw4.minimum_stmm = 0;
1487
1488     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
1489     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
1490     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
1491     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
1492
1493     sampler_dndi[index].dw6.dn_enable = 1;
1494     sampler_dndi[index].dw6.di_enable = 1;
1495     sampler_dndi[index].dw6.di_partial = 0;
1496     sampler_dndi[index].dw6.dndi_top_first = 1;
1497     sampler_dndi[index].dw6.dndi_stream_id = 1;
1498     sampler_dndi[index].dw6.dndi_first_frame = 1;
1499     sampler_dndi[index].dw6.progressive_dn = 0;
1500     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
1501     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
1502     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
1503
1504     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
1505     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
1506     sampler_dndi[index].dw7.vdi_walker_enable = 0;
1507     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
1508
1509     dri_bo_unmap(pp_context->sampler_state_table.bo);
1510
1511     /* private function & data */
1512     pp_context->pp_x_steps = pp_dndi_x_steps;
1513     pp_context->pp_y_steps = pp_dndi_y_steps;
1514     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
1515
1516     pp_static_parameter.grf1.statistics_surface_picth = w / 2;
1517     pp_static_parameter.grf1.r1_6.di.top_field_first = 0;
1518     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m2 = 64;
1519     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m1 = 192;
1520
1521     pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
1522     pp_inline_parameter.grf5.number_blocks = w / 16;
1523     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1524     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1525
1526     pp_dndi_context->dest_w = w;
1527     pp_dndi_context->dest_h = h;
1528 }
1529
1530 static void
1531 ironlake_pp_initialize(
1532     VADriverContextP   ctx,
1533     struct i965_post_processing_context *pp_context,
1534     const struct i965_surface *src_surface,
1535     const VARectangle *src_rect,
1536     const struct i965_surface *dst_surface,
1537     const VARectangle *dst_rect,
1538     int                pp_index,
1539     void *filter_param
1540 )
1541 {
1542     struct i965_driver_data *i965 = i965_driver_data(ctx);
1543     struct pp_module *pp_module;
1544     dri_bo *bo;
1545
1546     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
1547     bo = dri_bo_alloc(i965->intel.bufmgr,
1548                       "surface state & binding table",
1549                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
1550                       4096);
1551     assert(bo);
1552     pp_context->surface_state_binding_table.bo = bo;
1553
1554     dri_bo_unreference(pp_context->curbe.bo);
1555     bo = dri_bo_alloc(i965->intel.bufmgr,
1556                       "constant buffer",
1557                       4096, 
1558                       4096);
1559     assert(bo);
1560     pp_context->curbe.bo = bo;
1561
1562     dri_bo_unreference(pp_context->idrt.bo);
1563     bo = dri_bo_alloc(i965->intel.bufmgr, 
1564                       "interface discriptor", 
1565                       sizeof(struct i965_interface_descriptor), 
1566                       4096);
1567     assert(bo);
1568     pp_context->idrt.bo = bo;
1569     pp_context->idrt.num_interface_descriptors = 0;
1570
1571     dri_bo_unreference(pp_context->sampler_state_table.bo);
1572     bo = dri_bo_alloc(i965->intel.bufmgr, 
1573                       "sampler state table", 
1574                       4096,
1575                       4096);
1576     assert(bo);
1577     dri_bo_map(bo, True);
1578     memset(bo->virtual, 0, bo->size);
1579     dri_bo_unmap(bo);
1580     pp_context->sampler_state_table.bo = bo;
1581
1582     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1583     bo = dri_bo_alloc(i965->intel.bufmgr, 
1584                       "sampler 8x8 state ",
1585                       4096,
1586                       4096);
1587     assert(bo);
1588     pp_context->sampler_state_table.bo_8x8 = bo;
1589
1590     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1591     bo = dri_bo_alloc(i965->intel.bufmgr, 
1592                       "sampler 8x8 state ",
1593                       4096,
1594                       4096);
1595     assert(bo);
1596     pp_context->sampler_state_table.bo_8x8_uv = bo;
1597
1598     dri_bo_unreference(pp_context->vfe_state.bo);
1599     bo = dri_bo_alloc(i965->intel.bufmgr, 
1600                       "vfe state", 
1601                       sizeof(struct i965_vfe_state), 
1602                       4096);
1603     assert(bo);
1604     pp_context->vfe_state.bo = bo;
1605     
1606     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
1607     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
1608     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
1609     pp_context->current_pp = pp_index;
1610     pp_module = &pp_context->pp_modules[pp_index];
1611     
1612     if (pp_module->initialize)
1613         pp_module->initialize(ctx, pp_context,
1614                               src_surface,
1615                               src_rect,
1616                               dst_surface,
1617                               dst_rect,
1618                               filter_param);
1619 }
1620
1621 static void
1622 ironlake_post_processing(
1623     VADriverContextP   ctx,
1624     struct i965_post_processing_context *pp_context,
1625     const struct i965_surface *src_surface,
1626     const VARectangle *src_rect,
1627     const struct i965_surface *dst_surface,
1628     const VARectangle *dst_rect,
1629     int                pp_index,
1630     void *filter_param
1631 )
1632 {
1633     ironlake_pp_initialize(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
1634     ironlake_pp_states_setup(ctx, pp_context);
1635     ironlake_pp_pipeline_setup(ctx, pp_context);
1636 }
1637
1638 static void
1639 gen6_pp_initialize(
1640     VADriverContextP   ctx,
1641     struct i965_post_processing_context *pp_context,
1642     const struct i965_surface *src_surface,
1643     const VARectangle *src_rect,
1644     const struct i965_surface *dst_surface,
1645     const VARectangle *dst_rect,
1646     int                pp_index,
1647     void *filter_param
1648 )
1649 {
1650     struct i965_driver_data *i965 = i965_driver_data(ctx);
1651     struct pp_module *pp_module;
1652     dri_bo *bo;
1653
1654     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
1655     bo = dri_bo_alloc(i965->intel.bufmgr,
1656                       "surface state & binding table",
1657                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
1658                       4096);
1659     assert(bo);
1660     pp_context->surface_state_binding_table.bo = bo;
1661
1662     dri_bo_unreference(pp_context->curbe.bo);
1663     bo = dri_bo_alloc(i965->intel.bufmgr,
1664                       "constant buffer",
1665                       4096, 
1666                       4096);
1667     assert(bo);
1668     pp_context->curbe.bo = bo;
1669
1670     dri_bo_unreference(pp_context->idrt.bo);
1671     bo = dri_bo_alloc(i965->intel.bufmgr, 
1672                       "interface discriptor", 
1673                       sizeof(struct gen6_interface_descriptor_data), 
1674                       4096);
1675     assert(bo);
1676     pp_context->idrt.bo = bo;
1677     pp_context->idrt.num_interface_descriptors = 0;
1678
1679     dri_bo_unreference(pp_context->sampler_state_table.bo);
1680     bo = dri_bo_alloc(i965->intel.bufmgr, 
1681                       "sampler state table", 
1682                       4096,
1683                       4096);
1684     assert(bo);
1685     dri_bo_map(bo, True);
1686     memset(bo->virtual, 0, bo->size);
1687     dri_bo_unmap(bo);
1688     pp_context->sampler_state_table.bo = bo;
1689
1690     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1691     bo = dri_bo_alloc(i965->intel.bufmgr, 
1692                       "sampler 8x8 state ",
1693                       4096,
1694                       4096);
1695     assert(bo);
1696     pp_context->sampler_state_table.bo_8x8 = bo;
1697
1698     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1699     bo = dri_bo_alloc(i965->intel.bufmgr, 
1700                       "sampler 8x8 state ",
1701                       4096,
1702                       4096);
1703     assert(bo);
1704     pp_context->sampler_state_table.bo_8x8_uv = bo;
1705
1706     dri_bo_unreference(pp_context->vfe_state.bo);
1707     bo = dri_bo_alloc(i965->intel.bufmgr, 
1708                       "vfe state", 
1709                       sizeof(struct i965_vfe_state), 
1710                       4096);
1711     assert(bo);
1712     pp_context->vfe_state.bo = bo;
1713     
1714     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
1715     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
1716     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
1717     pp_context->current_pp = pp_index;
1718     pp_module = &pp_context->pp_modules[pp_index];
1719     
1720     if (pp_module->initialize)
1721         pp_module->initialize(ctx, pp_context,
1722                               src_surface,
1723                               src_rect,
1724                               dst_surface,
1725                               dst_rect,
1726                               filter_param);
1727 }
1728
1729 static void
1730 gen6_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1731 {
1732     struct gen6_interface_descriptor_data *desc;
1733     dri_bo *bo;
1734     int pp_index = pp_context->current_pp;
1735
1736     bo = pp_context->idrt.bo;
1737     dri_bo_map(bo, True);
1738     assert(bo->virtual);
1739     desc = bo->virtual;
1740     memset(desc, 0, sizeof(*desc));
1741     desc->desc0.kernel_start_pointer = 
1742         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
1743     desc->desc1.single_program_flow = 1;
1744     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
1745     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
1746     desc->desc2.sampler_state_pointer = 
1747         pp_context->sampler_state_table.bo->offset >> 5;
1748     desc->desc3.binding_table_entry_count = 0;
1749     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
1750     desc->desc4.constant_urb_entry_read_offset = 0;
1751     desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
1752
1753     dri_bo_emit_reloc(bo,
1754                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1755                       0,
1756                       offsetof(struct gen6_interface_descriptor_data, desc0),
1757                       pp_context->pp_modules[pp_index].kernel.bo);
1758
1759     dri_bo_emit_reloc(bo,
1760                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1761                       desc->desc2.sampler_count << 2,
1762                       offsetof(struct gen6_interface_descriptor_data, desc2),
1763                       pp_context->sampler_state_table.bo);
1764
1765     dri_bo_unmap(bo);
1766     pp_context->idrt.num_interface_descriptors++;
1767 }
1768
1769 static void
1770 gen6_pp_upload_constants(struct i965_post_processing_context *pp_context)
1771 {
1772     unsigned char *constant_buffer;
1773
1774     assert(sizeof(pp_static_parameter) == 128);
1775     dri_bo_map(pp_context->curbe.bo, 1);
1776     assert(pp_context->curbe.bo->virtual);
1777     constant_buffer = pp_context->curbe.bo->virtual;
1778     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
1779     dri_bo_unmap(pp_context->curbe.bo);
1780 }
1781
1782 static void
1783 gen6_pp_states_setup(VADriverContextP ctx,
1784                      struct i965_post_processing_context *pp_context)
1785 {
1786     gen6_pp_interface_descriptor_table(pp_context);
1787     gen6_pp_upload_constants(pp_context);
1788 }
1789
1790 static void
1791 gen6_pp_pipeline_select(VADriverContextP ctx,
1792                         struct i965_post_processing_context *pp_context)
1793 {
1794     struct intel_batchbuffer *batch = pp_context->batch;
1795
1796     BEGIN_BATCH(batch, 1);
1797     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
1798     ADVANCE_BATCH(batch);
1799 }
1800
1801 static void
1802 gen6_pp_state_base_address(VADriverContextP ctx,
1803                            struct i965_post_processing_context *pp_context)
1804 {
1805     struct intel_batchbuffer *batch = pp_context->batch;
1806
1807     BEGIN_BATCH(batch, 10);
1808     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1809     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1810     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1811     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1812     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1813     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1814     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1815     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1816     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1817     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1818     ADVANCE_BATCH(batch);
1819 }
1820
1821 static void
1822 gen6_pp_vfe_state(VADriverContextP ctx,
1823                   struct i965_post_processing_context *pp_context)
1824 {
1825     struct intel_batchbuffer *batch = pp_context->batch;
1826
1827     BEGIN_BATCH(batch, 8);
1828     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
1829     OUT_BATCH(batch, 0);
1830     OUT_BATCH(batch,
1831               (pp_context->urb.num_vfe_entries - 1) << 16 |
1832               pp_context->urb.num_vfe_entries << 8);
1833     OUT_BATCH(batch, 0);
1834     OUT_BATCH(batch,
1835               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* in 256 bits unit */
1836               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 - 1));            /* in 256 bits unit */
1837     OUT_BATCH(batch, 0);
1838     OUT_BATCH(batch, 0);
1839     OUT_BATCH(batch, 0);
1840     ADVANCE_BATCH(batch);
1841 }
1842
1843 static void
1844 gen6_pp_curbe_load(VADriverContextP ctx,
1845                    struct i965_post_processing_context *pp_context)
1846 {
1847     struct intel_batchbuffer *batch = pp_context->batch;
1848
1849     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512 <= pp_context->curbe.bo->size);
1850
1851     BEGIN_BATCH(batch, 4);
1852     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
1853     OUT_BATCH(batch, 0);
1854     OUT_BATCH(batch,
1855               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512);
1856     OUT_RELOC(batch, 
1857               pp_context->curbe.bo,
1858               I915_GEM_DOMAIN_INSTRUCTION, 0,
1859               0);
1860     ADVANCE_BATCH(batch);
1861 }
1862
1863 static void
1864 gen6_interface_descriptor_load(VADriverContextP ctx,
1865                                struct i965_post_processing_context *pp_context)
1866 {
1867     struct intel_batchbuffer *batch = pp_context->batch;
1868
1869     BEGIN_BATCH(batch, 4);
1870     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
1871     OUT_BATCH(batch, 0);
1872     OUT_BATCH(batch,
1873               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
1874     OUT_RELOC(batch, 
1875               pp_context->idrt.bo,
1876               I915_GEM_DOMAIN_INSTRUCTION, 0,
1877               0);
1878     ADVANCE_BATCH(batch);
1879 }
1880
1881 static void
1882 gen6_pp_object_walker(VADriverContextP ctx,
1883                       struct i965_post_processing_context *pp_context)
1884 {
1885     struct intel_batchbuffer *batch = pp_context->batch;
1886     int x, x_steps, y, y_steps;
1887
1888     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
1889     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
1890
1891     for (y = 0; y < y_steps; y++) {
1892         for (x = 0; x < x_steps; x++) {
1893             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1894                 BEGIN_BATCH(batch, 22);
1895                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 20);
1896                 OUT_BATCH(batch, 0);
1897                 OUT_BATCH(batch, 0); /* no indirect data */
1898                 OUT_BATCH(batch, 0);
1899                 OUT_BATCH(batch, 0); /* scoreboard */
1900                 OUT_BATCH(batch, 0);
1901
1902                 /* inline data grf 5-6 */
1903                 assert(sizeof(pp_inline_parameter) == 64);
1904                 intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
1905
1906                 ADVANCE_BATCH(batch);
1907             }
1908         }
1909     }
1910 }
1911
1912 static void
1913 gen6_pp_pipeline_setup(VADriverContextP ctx,
1914                        struct i965_post_processing_context *pp_context)
1915 {
1916     struct intel_batchbuffer *batch = pp_context->batch;
1917
1918     intel_batchbuffer_start_atomic(batch, 0x1000);
1919     intel_batchbuffer_emit_mi_flush(batch);
1920     gen6_pp_pipeline_select(ctx, pp_context);
1921     gen6_pp_state_base_address(ctx, pp_context);
1922     gen6_pp_curbe_load(ctx, pp_context);
1923     gen6_interface_descriptor_load(ctx, pp_context);
1924     gen6_pp_vfe_state(ctx, pp_context);
1925     gen6_pp_object_walker(ctx, pp_context);
1926     intel_batchbuffer_end_atomic(batch);
1927 }
1928
1929 static void
1930 gen6_post_processing(
1931     VADriverContextP   ctx,
1932     struct i965_post_processing_context *pp_context,
1933     const struct i965_surface *src_surface,
1934     const VARectangle *src_rect,
1935     const struct i965_surface *dst_surface,
1936     const VARectangle *dst_rect,
1937     int                pp_index,
1938     void * filter_param
1939 )
1940 {
1941     gen6_pp_initialize(ctx, pp_context,
1942                        src_surface,
1943                        src_rect,
1944                        dst_surface,
1945                        dst_rect,
1946                        pp_index,
1947                        filter_param);
1948     gen6_pp_states_setup(ctx, pp_context);
1949     gen6_pp_pipeline_setup(ctx, pp_context);
1950 }
1951
1952 static void
1953 i965_post_processing_internal(
1954     VADriverContextP   ctx,
1955     struct i965_post_processing_context *pp_context,
1956     const struct i965_surface *src_surface,
1957     const VARectangle *src_rect,
1958     const struct i965_surface *dst_surface,
1959     const VARectangle *dst_rect,
1960     int                pp_index,
1961     void *filter_param
1962 )
1963 {
1964     struct i965_driver_data *i965 = i965_driver_data(ctx);
1965
1966     if (IS_GEN6(i965->intel.device_id) ||
1967         IS_GEN7(i965->intel.device_id))
1968         gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
1969     else
1970         ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
1971 }
1972
1973 VAStatus 
1974 i965_DestroySurfaces(VADriverContextP ctx,
1975                      VASurfaceID *surface_list,
1976                      int num_surfaces);
1977 VAStatus 
1978 i965_CreateSurfaces(VADriverContextP ctx,
1979                     int width,
1980                     int height,
1981                     int format,
1982                     int num_surfaces,
1983                     VASurfaceID *surfaces);
1984 VASurfaceID
1985 i965_post_processing(
1986     VADriverContextP   ctx,
1987     VASurfaceID        surface,
1988     const VARectangle *src_rect,
1989     const VARectangle *dst_rect,
1990     unsigned int       flags,
1991     int               *has_done_scaling  
1992 )
1993 {
1994     struct i965_driver_data *i965 = i965_driver_data(ctx);
1995     VASurfaceID in_surface_id = surface;
1996     VASurfaceID out_surface_id = VA_INVALID_ID;
1997     
1998     *has_done_scaling = 0;
1999
2000     if (HAS_PP(i965)) {
2001         struct object_surface *obj_surface;
2002         VAStatus status;
2003         struct i965_surface src_surface;
2004         struct i965_surface dst_surface;
2005
2006         obj_surface = SURFACE(in_surface_id);
2007
2008         /* Currently only support post processing for NV12 surface */
2009         if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
2010             return out_surface_id;
2011
2012         if (flags & I965_PP_FLAG_DEINTERLACING) {
2013             status = i965_CreateSurfaces(ctx,
2014                                          obj_surface->orig_width,
2015                                          obj_surface->orig_height,
2016                                          VA_RT_FORMAT_YUV420,
2017                                          1,
2018                                          &out_surface_id);
2019             assert(status == VA_STATUS_SUCCESS);
2020             obj_surface = SURFACE(out_surface_id);
2021             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
2022
2023             src_surface.id = in_surface_id;
2024             src_surface.flag = I965_SURFACE_SURFACE;
2025             dst_surface.id = out_surface_id;
2026             dst_surface.flag = I965_SURFACE_SURFACE;
2027
2028             i965_post_processing_internal(ctx, i965->pp_context,
2029                                           &src_surface,
2030                                           src_rect,
2031                                           &dst_surface,
2032                                           dst_rect,
2033                                           PP_NV12_DNDI,
2034                                           NULL);
2035         }
2036
2037         if (flags & I965_PP_FLAG_AVS) {
2038             struct i965_render_state *render_state = &i965->render_state;
2039             struct intel_region *dest_region = render_state->draw_region;
2040
2041             if (out_surface_id != VA_INVALID_ID)
2042                 in_surface_id = out_surface_id;
2043
2044             status = i965_CreateSurfaces(ctx,
2045                                          dest_region->width,
2046                                          dest_region->height,
2047                                          VA_RT_FORMAT_YUV420,
2048                                          1,
2049                                          &out_surface_id);
2050             assert(status == VA_STATUS_SUCCESS);
2051             obj_surface = SURFACE(out_surface_id);
2052             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
2053
2054             src_surface.id = in_surface_id;
2055             src_surface.flag = I965_SURFACE_SURFACE;
2056             dst_surface.id = out_surface_id;
2057             dst_surface.flag = I965_SURFACE_SURFACE;
2058
2059             i965_post_processing_internal(ctx, i965->pp_context,
2060                                           &src_surface,
2061                                           src_rect,
2062                                           &dst_surface,
2063                                           dst_rect,
2064                                           PP_NV12_AVS,
2065                                           NULL);
2066
2067             if (in_surface_id != surface)
2068                 i965_DestroySurfaces(ctx, &in_surface_id, 1);
2069                 
2070             *has_done_scaling = 1;
2071         }
2072     }
2073
2074     return out_surface_id;
2075 }       
2076
2077 static VAStatus
2078 i965_image_i420_processing(VADriverContextP ctx,
2079                            const struct i965_surface *src_surface,
2080                            const VARectangle *src_rect,
2081                            const struct i965_surface *dst_surface,
2082                            const VARectangle *dst_rect)
2083 {
2084     struct i965_driver_data *i965 = i965_driver_data(ctx);
2085     struct i965_post_processing_context *pp_context = i965->pp_context;
2086     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
2087
2088     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
2089         i965_post_processing_internal(ctx, i965->pp_context,
2090                                       src_surface,
2091                                       src_rect,
2092                                       dst_surface,
2093                                       dst_rect,
2094                                       PP_PL3_LOAD_SAVE_N12,
2095                                       NULL);
2096     } else {
2097         i965_post_processing_internal(ctx, i965->pp_context,
2098                                       src_surface,
2099                                       src_rect,
2100                                       dst_surface,
2101                                       dst_rect,
2102                                       PP_PL3_LOAD_SAVE_PL3,
2103                                       NULL);
2104     }
2105
2106     intel_batchbuffer_flush(pp_context->batch);
2107
2108     return VA_STATUS_SUCCESS;
2109 }
2110
2111 static VAStatus
2112 i965_image_nv12_processing(VADriverContextP ctx,
2113                            const struct i965_surface *src_surface,
2114                            const VARectangle *src_rect,
2115                            const struct i965_surface *dst_surface,
2116                            const VARectangle *dst_rect)
2117 {
2118     struct i965_driver_data *i965 = i965_driver_data(ctx);
2119     struct i965_post_processing_context *pp_context = i965->pp_context;
2120     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
2121
2122     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
2123         i965_post_processing_internal(ctx, i965->pp_context,
2124                                       src_surface,
2125                                       src_rect,
2126                                       dst_surface,
2127                                       dst_rect,
2128                                       PP_NV12_LOAD_SAVE_N12,
2129                                       NULL);
2130     } else {
2131         i965_post_processing_internal(ctx, i965->pp_context,
2132                                       src_surface,
2133                                       src_rect,
2134                                       dst_surface,
2135                                       dst_rect,
2136                                       PP_NV12_LOAD_SAVE_PL3,
2137                                       NULL);
2138     }
2139
2140     intel_batchbuffer_flush(pp_context->batch);
2141
2142     return VA_STATUS_SUCCESS;
2143 }
2144
2145 VAStatus
2146 i965_image_processing(VADriverContextP ctx,
2147                       const struct i965_surface *src_surface,
2148                       const VARectangle *src_rect,
2149                       const struct i965_surface *dst_surface,
2150                       const VARectangle *dst_rect)
2151 {
2152     struct i965_driver_data *i965 = i965_driver_data(ctx);
2153     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
2154
2155     if (HAS_PP(i965)) {
2156         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
2157
2158         switch (fourcc) {
2159         case VA_FOURCC('Y', 'V', '1', '2'):
2160         case VA_FOURCC('I', '4', '2', '0'):
2161             status = i965_image_i420_processing(ctx,
2162                                                 src_surface,
2163                                                 src_rect,
2164                                                 dst_surface,
2165                                                 dst_rect);
2166             break;
2167
2168         case  VA_FOURCC('N', 'V', '1', '2'):
2169             status = i965_image_nv12_processing(ctx,
2170                                                 src_surface,
2171                                                 src_rect,
2172                                                 dst_surface,
2173                                                 dst_rect);
2174             break;
2175
2176         default:
2177             status = VA_STATUS_ERROR_UNIMPLEMENTED;
2178             break;
2179         }
2180     }
2181
2182     return status;
2183 }       
2184
2185 static void
2186 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
2187 {
2188     int i;
2189
2190     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
2191     pp_context->surface_state_binding_table.bo = NULL;
2192
2193     dri_bo_unreference(pp_context->curbe.bo);
2194     pp_context->curbe.bo = NULL;
2195
2196     dri_bo_unreference(pp_context->sampler_state_table.bo);
2197     pp_context->sampler_state_table.bo = NULL;
2198
2199     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
2200     pp_context->sampler_state_table.bo_8x8 = NULL;
2201
2202     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
2203     pp_context->sampler_state_table.bo_8x8_uv = NULL;
2204
2205     dri_bo_unreference(pp_context->idrt.bo);
2206     pp_context->idrt.bo = NULL;
2207     pp_context->idrt.num_interface_descriptors = 0;
2208
2209     dri_bo_unreference(pp_context->vfe_state.bo);
2210     pp_context->vfe_state.bo = NULL;
2211
2212     dri_bo_unreference(pp_context->stmm.bo);
2213     pp_context->stmm.bo = NULL;
2214
2215     for (i = 0; i < NUM_PP_MODULES; i++) {
2216         struct pp_module *pp_module = &pp_context->pp_modules[i];
2217
2218         dri_bo_unreference(pp_module->kernel.bo);
2219         pp_module->kernel.bo = NULL;
2220     }
2221
2222 }
2223
2224 Bool
2225 i965_post_processing_terminate(VADriverContextP ctx)
2226 {
2227     struct i965_driver_data *i965 = i965_driver_data(ctx);
2228     struct i965_post_processing_context *pp_context = i965->pp_context;
2229
2230     if (pp_context) {
2231         i965_post_processing_context_finalize(pp_context);
2232         free(pp_context);
2233     }
2234
2235     i965->pp_context = NULL;
2236
2237     return True;
2238 }
2239
2240 static void
2241 i965_post_processing_context_init(VADriverContextP ctx,
2242                                   struct i965_post_processing_context *pp_context,
2243                                   struct intel_batchbuffer *batch)
2244 {
2245     struct i965_driver_data *i965 = i965_driver_data(ctx);
2246     int i;
2247
2248     pp_context->urb.size = URB_SIZE((&i965->intel));
2249     pp_context->urb.num_vfe_entries = 32;
2250     pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
2251     pp_context->urb.num_cs_entries = 1;
2252     pp_context->urb.size_cs_entry = 2;      /* in 512 bits unit */
2253     pp_context->urb.vfe_start = 0;
2254     pp_context->urb.cs_start = pp_context->urb.vfe_start + 
2255         pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
2256     assert(pp_context->urb.cs_start + 
2257            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
2258
2259     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
2260     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
2261
2262     if (IS_GEN6(i965->intel.device_id) ||
2263         IS_GEN7(i965->intel.device_id))
2264         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
2265     else if (IS_IRONLAKE(i965->intel.device_id))
2266         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
2267
2268     for (i = 0; i < NUM_PP_MODULES; i++) {
2269         struct pp_module *pp_module = &pp_context->pp_modules[i];
2270         dri_bo_unreference(pp_module->kernel.bo);
2271         if (pp_module->kernel.bin) {
2272             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
2273                                                 pp_module->kernel.name,
2274                                                 pp_module->kernel.size,
2275                                                 4096);
2276             assert(pp_module->kernel.bo);
2277             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
2278         } else {
2279             pp_module->kernel.bo = NULL;
2280         }
2281     }
2282
2283     pp_context->batch = batch;
2284 }
2285
2286 Bool
2287 i965_post_processing_init(VADriverContextP ctx)
2288 {
2289     struct i965_driver_data *i965 = i965_driver_data(ctx);
2290     struct i965_post_processing_context *pp_context = i965->pp_context;
2291
2292     if (HAS_PP(i965)) {
2293         if (pp_context == NULL) {
2294             pp_context = calloc(1, sizeof(*pp_context));
2295             i965_post_processing_context_init(ctx, pp_context, i965->batch);
2296             i965->pp_context = pp_context;
2297         }
2298     }
2299
2300     return True;
2301 }
2302
2303 static const int procfilter_to_pp_flag[10] = {
2304     PP_NULL,    /* VAProcFilterNone */
2305     PP_NULL,    /* VAProcFilterDering */
2306     PP_NULL,    /* VAProcFilterDeblocking */
2307     PP_NV12_DNDI, /* VAProcFilterNoiseReduction */
2308     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
2309     PP_NULL,    /* VAProcFilterSharpening */
2310     PP_NULL,    /* VAProcFilterColorEnhancement */
2311     PP_NULL,    /* VAProcFilterProcAmp */
2312     PP_NULL,    /* VAProcFilterComposition */
2313     PP_NULL,    /* VAProcFilterFrameRateConversion */
2314 };
2315
2316 static void 
2317 i965_proc_picture(VADriverContextP ctx, 
2318                   VAProfile profile, 
2319                   union codec_state *codec_state,
2320                   struct hw_context *hw_context)
2321 {
2322     struct i965_driver_data *i965 = i965_driver_data(ctx);
2323     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
2324     struct proc_state *proc_state = &codec_state->proc;
2325     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
2326     VAProcInputParameterBuffer *input_param = (VAProcInputParameterBuffer *)proc_state->input_param->buffer;
2327     struct object_surface *obj_surface;
2328     struct i965_surface src_surface, dst_surface;
2329     VAStatus status;
2330     int i;
2331     VASurfaceID tmp_surfaces[VA_PROC_PIPELINE_MAX_NUM_FILTERS];
2332     int num_tmp_surfaces = 0;
2333
2334     assert(input_param->surface != VA_INVALID_ID);
2335     assert(proc_state->current_render_target != VA_INVALID_ID);
2336
2337     obj_surface = SURFACE(proc_state->current_render_target);
2338     i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
2339
2340     obj_surface = SURFACE(input_param->surface);
2341     assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'));
2342
2343     src_surface.id = input_param->surface;
2344     src_surface.flag = I965_SURFACE_SURFACE;
2345     
2346     for (i = 0; i < VA_PROC_PIPELINE_MAX_NUM_FILTERS; i++) {
2347         VAProcFilterType filter_type = pipeline_param->filter_pipeline[i];
2348         VASurfaceID out_surface_id = VA_INVALID_ID;
2349         void *filter_param = NULL;
2350
2351         if (procfilter_to_pp_flag[filter_type] != PP_NULL) {
2352             if (proc_state->filter_param[filter_type])
2353                 filter_param = proc_state->filter_param[filter_type]->buffer;
2354
2355             status = i965_CreateSurfaces(ctx,
2356                                          obj_surface->orig_width,
2357                                          obj_surface->orig_height,
2358                                          VA_RT_FORMAT_YUV420,
2359                                          1,
2360                                          &out_surface_id);
2361             assert(status == VA_STATUS_SUCCESS);
2362             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
2363             obj_surface = SURFACE(out_surface_id);
2364             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
2365             dst_surface.id = out_surface_id;
2366             dst_surface.flag = I965_SURFACE_SURFACE;
2367             i965_post_processing_internal(ctx, &proc_context->pp_context,
2368                                           &src_surface,
2369                                           &input_param->region,
2370                                           &dst_surface,
2371                                           &input_param->region,
2372                                           procfilter_to_pp_flag[filter_type],
2373                                           filter_param);
2374             src_surface.id = dst_surface.id;
2375         }
2376     }
2377
2378     dst_surface.id = proc_state->current_render_target;
2379     dst_surface.flag = I965_SURFACE_SURFACE;
2380     i965_post_processing_internal(ctx, &proc_context->pp_context,
2381                                   &src_surface,
2382                                   &input_param->region,
2383                                   &dst_surface,
2384                                   &pipeline_param->output_region,
2385                                   PP_NV12_AVS,
2386                                   NULL);
2387
2388     if (num_tmp_surfaces)
2389         i965_DestroySurfaces(ctx,
2390                              tmp_surfaces,
2391                              num_tmp_surfaces);
2392
2393     intel_batchbuffer_flush(hw_context->batch);
2394 }
2395
2396 static void
2397 i965_proc_context_destroy(void *hw_context)
2398 {
2399     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
2400
2401     i965_post_processing_context_finalize(&proc_context->pp_context);
2402     intel_batchbuffer_free(proc_context->base.batch);
2403     free(proc_context);
2404 }
2405
2406 struct hw_context *
2407 i965_proc_context_init(VADriverContextP ctx, VAProfile profile)
2408 {
2409     struct intel_driver_data *intel = intel_driver_data(ctx);
2410     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
2411
2412     proc_context->base.destroy = i965_proc_context_destroy;
2413     proc_context->base.run = i965_proc_picture;
2414     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
2415     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
2416
2417     return (struct hw_context *)proc_context;
2418 }