VPP: pass the origin of source region to vpp kernel
[profile/ivi/vaapi-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41
42 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
43                      IS_GEN6((ctx)->intel.device_id) ||         \
44                      IS_GEN7((ctx)->intel.device_id))
45
46 #define SURFACE_STATE_PADDED_SIZE_0_I965        ALIGN(sizeof(struct i965_surface_state), 32)
47 #define SURFACE_STATE_PADDED_SIZE_1_I965        ALIGN(sizeof(struct i965_surface_state2), 32)
48 #define SURFACE_STATE_PADDED_SIZE_I965          MAX(SURFACE_STATE_PADDED_SIZE_0_I965, SURFACE_STATE_PADDED_SIZE_1_I965)
49
50 #define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
51 #define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
52 #define SURFACE_STATE_PADDED_SIZE_GEN7          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
53
54 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
55 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
56 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
57
58 static const uint32_t pp_null_gen5[][4] = {
59 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
60 };
61
62 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
63 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
64 };
65
66 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
67 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
68 };
69
70 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
71 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
72 };
73
74 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
75 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
76 };
77
78 static const uint32_t pp_nv12_scaling_gen5[][4] = {
79 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
80 };
81
82 static const uint32_t pp_nv12_avs_gen5[][4] = {
83 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
84 };
85
86 static const uint32_t pp_nv12_dndi_gen5[][4] = {
87 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
88 };
89
90 static const uint32_t pp_nv12_dn_gen5[][4] = {
91 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
92 };
93
94 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
95                                    const struct i965_surface *src_surface,
96                                    const VARectangle *src_rect,
97                                    struct i965_surface *dst_surface,
98                                    const VARectangle *dst_rect,
99                                    void *filter_param);
100 static VAStatus pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
101                                             const struct i965_surface *src_surface,
102                                             const VARectangle *src_rect,
103                                             struct i965_surface *dst_surface,
104                                             const VARectangle *dst_rect,
105                                             void *filter_param);
106 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
107                                            const struct i965_surface *src_surface,
108                                            const VARectangle *src_rect,
109                                            struct i965_surface *dst_surface,
110                                            const VARectangle *dst_rect,
111                                            void *filter_param);
112 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
113                                              const struct i965_surface *src_surface,
114                                              const VARectangle *src_rect,
115                                              struct i965_surface *dst_surface,
116                                              const VARectangle *dst_rect,
117                                              void *filter_param);
118 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
119                                                 const struct i965_surface *src_surface,
120                                                 const VARectangle *src_rect,
121                                                 struct i965_surface *dst_surface,
122                                                 const VARectangle *dst_rect,
123                                                 void *filter_param);
124 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
125                                         const struct i965_surface *src_surface,
126                                         const VARectangle *src_rect,
127                                         struct i965_surface *dst_surface,
128                                         const VARectangle *dst_rect,
129                                         void *filter_param);
130 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
131                                       const struct i965_surface *src_surface,
132                                       const VARectangle *src_rect,
133                                       struct i965_surface *dst_surface,
134                                       const VARectangle *dst_rect,
135                                       void *filter_param);
136
137 static struct pp_module pp_modules_gen5[] = {
138     {
139         {
140             "NULL module (for testing)",
141             PP_NULL,
142             pp_null_gen5,
143             sizeof(pp_null_gen5),
144             NULL,
145         },
146
147         pp_null_initialize,
148     },
149
150     {
151         {
152             "NV12_NV12",
153             PP_NV12_LOAD_SAVE_N12,
154             pp_nv12_load_save_nv12_gen5,
155             sizeof(pp_nv12_load_save_nv12_gen5),
156             NULL,
157         },
158
159         pp_plx_load_save_plx_initialize,
160     },
161
162     {
163         {
164             "NV12_PL3",
165             PP_NV12_LOAD_SAVE_PL3,
166             pp_nv12_load_save_pl3_gen5,
167             sizeof(pp_nv12_load_save_pl3_gen5),
168             NULL,
169         },
170
171         pp_plx_load_save_plx_initialize,
172     },
173
174     {
175         {
176             "PL3_NV12",
177             PP_PL3_LOAD_SAVE_N12,
178             pp_pl3_load_save_nv12_gen5,
179             sizeof(pp_pl3_load_save_nv12_gen5),
180             NULL,
181         },
182
183         pp_plx_load_save_plx_initialize,
184     },
185
186     {
187         {
188             "PL3_PL3",
189             PP_PL3_LOAD_SAVE_N12,
190             pp_pl3_load_save_pl3_gen5,
191             sizeof(pp_pl3_load_save_pl3_gen5),
192             NULL,
193         },
194
195         pp_plx_load_save_plx_initialize
196     },
197
198     {
199         {
200             "NV12 Scaling module",
201             PP_NV12_SCALING,
202             pp_nv12_scaling_gen5,
203             sizeof(pp_nv12_scaling_gen5),
204             NULL,
205         },
206
207         pp_nv12_scaling_initialize,
208     },
209
210     {
211         {
212             "NV12 AVS module",
213             PP_NV12_AVS,
214             pp_nv12_avs_gen5,
215             sizeof(pp_nv12_avs_gen5),
216             NULL,
217         },
218
219         pp_nv12_avs_initialize_nlas,
220     },
221
222     {
223         {
224             "NV12 DNDI module",
225             PP_NV12_DNDI,
226             pp_nv12_dndi_gen5,
227             sizeof(pp_nv12_dndi_gen5),
228             NULL,
229         },
230
231         pp_nv12_dndi_initialize,
232     },
233
234     {
235         {
236             "NV12 DN module",
237             PP_NV12_DN,
238             pp_nv12_dn_gen5,
239             sizeof(pp_nv12_dn_gen5),
240             NULL,
241         },
242
243         pp_nv12_dn_initialize,
244     },
245 };
246
247 static const uint32_t pp_null_gen6[][4] = {
248 #include "shaders/post_processing/gen5_6/null.g6b"
249 };
250
251 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
252 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
253 };
254
255 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
256 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
257 };
258
259 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
260 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
261 };
262
263 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
264 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
265 };
266
267 static const uint32_t pp_nv12_scaling_gen6[][4] = {
268 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
269 };
270
271 static const uint32_t pp_nv12_avs_gen6[][4] = {
272 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
273 };
274
275 static const uint32_t pp_nv12_dndi_gen6[][4] = {
276 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
277 };
278
279 static const uint32_t pp_nv12_dn_gen6[][4] = {
280 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
281 };
282
283 static struct pp_module pp_modules_gen6[] = {
284     {
285         {
286             "NULL module (for testing)",
287             PP_NULL,
288             pp_null_gen6,
289             sizeof(pp_null_gen6),
290             NULL,
291         },
292
293         pp_null_initialize,
294     },
295
296     {
297         {
298             "NV12_NV12",
299             PP_NV12_LOAD_SAVE_N12,
300             pp_nv12_load_save_nv12_gen6,
301             sizeof(pp_nv12_load_save_nv12_gen6),
302             NULL,
303         },
304
305         pp_plx_load_save_plx_initialize,
306     },
307
308     {
309         {
310             "NV12_PL3",
311             PP_NV12_LOAD_SAVE_PL3,
312             pp_nv12_load_save_pl3_gen6,
313             sizeof(pp_nv12_load_save_pl3_gen6),
314             NULL,
315         },
316         
317         pp_plx_load_save_plx_initialize,
318     },
319
320     {
321         {
322             "PL3_NV12",
323             PP_PL3_LOAD_SAVE_N12,
324             pp_pl3_load_save_nv12_gen6,
325             sizeof(pp_pl3_load_save_nv12_gen6),
326             NULL,
327         },
328
329         pp_plx_load_save_plx_initialize,
330     },
331
332     {
333         {
334             "PL3_PL3",
335             PP_PL3_LOAD_SAVE_N12,
336             pp_pl3_load_save_pl3_gen6,
337             sizeof(pp_pl3_load_save_pl3_gen6),
338             NULL,
339         },
340
341         pp_plx_load_save_plx_initialize,
342     },
343
344     {
345         {
346             "NV12 Scaling module",
347             PP_NV12_SCALING,
348             pp_nv12_scaling_gen6,
349             sizeof(pp_nv12_scaling_gen6),
350             NULL,
351         },
352
353         gen6_nv12_scaling_initialize,
354     },
355
356     {
357         {
358             "NV12 AVS module",
359             PP_NV12_AVS,
360             pp_nv12_avs_gen6,
361             sizeof(pp_nv12_avs_gen6),
362             NULL,
363         },
364
365         pp_nv12_avs_initialize_nlas,
366     },
367
368     {
369         {
370             "NV12 DNDI module",
371             PP_NV12_DNDI,
372             pp_nv12_dndi_gen6,
373             sizeof(pp_nv12_dndi_gen6),
374             NULL,
375         },
376
377         pp_nv12_dndi_initialize,
378     },
379
380     {
381         {
382             "NV12 DN module",
383             PP_NV12_DN,
384             pp_nv12_dn_gen6,
385             sizeof(pp_nv12_dn_gen6),
386             NULL,
387         },
388
389         pp_nv12_dn_initialize,
390     },
391 };
392
393 static const uint32_t pp_null_gen7[][4] = {
394 };
395
396 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
397 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
398 };
399
400 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
401 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
402 };
403
404 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
405 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
406 };
407
408 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
409 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
410 };
411
412 static const uint32_t pp_nv12_scaling_gen7[][4] = {
413 #include "shaders/post_processing/gen7/avs.g7b"
414 };
415
416 static const uint32_t pp_nv12_avs_gen7[][4] = {
417 #include "shaders/post_processing/gen7/avs.g7b"
418 };
419
420 static const uint32_t pp_nv12_dndi_gen7[][4] = {
421 // #include "shaders/post_processing/gen7/dndi.g7b"
422 };
423
424 static const uint32_t pp_nv12_dn_gen7[][4] = {
425 };
426
427 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
428                                            const struct i965_surface *src_surface,
429                                            const VARectangle *src_rect,
430                                            struct i965_surface *dst_surface,
431                                            const VARectangle *dst_rect,
432                                            void *filter_param);
433 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
434                                              const struct i965_surface *src_surface,
435                                              const VARectangle *src_rect,
436                                              struct i965_surface *dst_surface,
437                                              const VARectangle *dst_rect,
438                                              void *filter_param);
439 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
440                                            const struct i965_surface *src_surface,
441                                            const VARectangle *src_rect,
442                                            struct i965_surface *dst_surface,
443                                            const VARectangle *dst_rect,
444                                            void *filter_param);
445
446 static struct pp_module pp_modules_gen7[] = {
447     {
448         {
449             "NULL module (for testing)",
450             PP_NULL,
451             pp_null_gen7,
452             sizeof(pp_null_gen7),
453             NULL,
454         },
455
456         pp_null_initialize,
457     },
458
459     {
460         {
461             "NV12_NV12",
462             PP_NV12_LOAD_SAVE_N12,
463             pp_nv12_load_save_nv12_gen7,
464             sizeof(pp_nv12_load_save_nv12_gen7),
465             NULL,
466         },
467
468         gen7_pp_plx_avs_initialize,
469     },
470
471     {
472         {
473             "NV12_PL3",
474             PP_NV12_LOAD_SAVE_PL3,
475             pp_nv12_load_save_pl3_gen7,
476             sizeof(pp_nv12_load_save_pl3_gen7),
477             NULL,
478         },
479         
480         gen7_pp_plx_avs_initialize,
481     },
482
483     {
484         {
485             "PL3_NV12",
486             PP_PL3_LOAD_SAVE_N12,
487             pp_pl3_load_save_nv12_gen7,
488             sizeof(pp_pl3_load_save_nv12_gen7),
489             NULL,
490         },
491
492         gen7_pp_plx_avs_initialize,
493     },
494
495     {
496         {
497             "PL3_PL3",
498             PP_PL3_LOAD_SAVE_N12,
499             pp_pl3_load_save_pl3_gen7,
500             sizeof(pp_pl3_load_save_pl3_gen7),
501             NULL,
502         },
503
504         gen7_pp_plx_avs_initialize,
505     },
506
507     {
508         {
509             "NV12 Scaling module",
510             PP_NV12_SCALING,
511             pp_nv12_scaling_gen7,
512             sizeof(pp_nv12_scaling_gen7),
513             NULL,
514         },
515
516         gen7_pp_plx_avs_initialize,
517     },
518
519     {
520         {
521             "NV12 AVS module",
522             PP_NV12_AVS,
523             pp_nv12_avs_gen7,
524             sizeof(pp_nv12_avs_gen7),
525             NULL,
526         },
527
528         gen7_pp_plx_avs_initialize,
529     },
530
531     {
532         {
533             "NV12 DNDI module",
534             PP_NV12_DNDI,
535             pp_nv12_dndi_gen7,
536             sizeof(pp_nv12_dndi_gen7),
537             NULL,
538         },
539
540         gen7_pp_nv12_dndi_initialize,
541     },
542
543     {
544         {
545             "NV12 DN module",
546             PP_NV12_DN,
547             pp_nv12_dn_gen7,
548             sizeof(pp_nv12_dn_gen7),
549             NULL,
550         },
551
552         gen7_pp_nv12_dn_initialize,
553     },
554 };
555
556 static int
557 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
558 {
559     struct i965_driver_data *i965 = i965_driver_data(ctx);
560     int fourcc;
561
562     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
563         struct object_image *obj_image = IMAGE(surface->id);
564         fourcc = obj_image->image.format.fourcc;
565     } else {
566         struct object_surface *obj_surface = SURFACE(surface->id);
567         fourcc = obj_surface->fourcc;
568     }
569
570     return fourcc;
571 }
572
573 static void
574 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
575 {
576     switch (tiling) {
577     case I915_TILING_NONE:
578         ss->ss3.tiled_surface = 0;
579         ss->ss3.tile_walk = 0;
580         break;
581     case I915_TILING_X:
582         ss->ss3.tiled_surface = 1;
583         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
584         break;
585     case I915_TILING_Y:
586         ss->ss3.tiled_surface = 1;
587         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
588         break;
589     }
590 }
591
592 static void
593 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
594 {
595     switch (tiling) {
596     case I915_TILING_NONE:
597         ss->ss2.tiled_surface = 0;
598         ss->ss2.tile_walk = 0;
599         break;
600     case I915_TILING_X:
601         ss->ss2.tiled_surface = 1;
602         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
603         break;
604     case I915_TILING_Y:
605         ss->ss2.tiled_surface = 1;
606         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
607         break;
608     }
609 }
610
611 static void
612 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
613 {
614     switch (tiling) {
615     case I915_TILING_NONE:
616         ss->ss0.tiled_surface = 0;
617         ss->ss0.tile_walk = 0;
618         break;
619     case I915_TILING_X:
620         ss->ss0.tiled_surface = 1;
621         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
622         break;
623     case I915_TILING_Y:
624         ss->ss0.tiled_surface = 1;
625         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
626         break;
627     }
628 }
629
630 static void
631 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
632 {
633     switch (tiling) {
634     case I915_TILING_NONE:
635         ss->ss2.tiled_surface = 0;
636         ss->ss2.tile_walk = 0;
637         break;
638     case I915_TILING_X:
639         ss->ss2.tiled_surface = 1;
640         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
641         break;
642     case I915_TILING_Y:
643         ss->ss2.tiled_surface = 1;
644         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
645         break;
646     }
647 }
648
649 static void
650 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
651 {
652     struct i965_interface_descriptor *desc;
653     dri_bo *bo;
654     int pp_index = pp_context->current_pp;
655
656     bo = pp_context->idrt.bo;
657     dri_bo_map(bo, 1);
658     assert(bo->virtual);
659     desc = bo->virtual;
660     memset(desc, 0, sizeof(*desc));
661     desc->desc0.grf_reg_blocks = 10;
662     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
663     desc->desc1.const_urb_entry_read_offset = 0;
664     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
665     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
666     desc->desc2.sampler_count = 0;
667     desc->desc3.binding_table_entry_count = 0;
668     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
669
670     dri_bo_emit_reloc(bo,
671                       I915_GEM_DOMAIN_INSTRUCTION, 0,
672                       desc->desc0.grf_reg_blocks,
673                       offsetof(struct i965_interface_descriptor, desc0),
674                       pp_context->pp_modules[pp_index].kernel.bo);
675
676     dri_bo_emit_reloc(bo,
677                       I915_GEM_DOMAIN_INSTRUCTION, 0,
678                       desc->desc2.sampler_count << 2,
679                       offsetof(struct i965_interface_descriptor, desc2),
680                       pp_context->sampler_state_table.bo);
681
682     dri_bo_unmap(bo);
683     pp_context->idrt.num_interface_descriptors++;
684 }
685
686 static void
687 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
688 {
689     struct i965_vfe_state *vfe_state;
690     dri_bo *bo;
691
692     bo = pp_context->vfe_state.bo;
693     dri_bo_map(bo, 1);
694     assert(bo->virtual);
695     vfe_state = bo->virtual;
696     memset(vfe_state, 0, sizeof(*vfe_state));
697     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
698     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
699     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
700     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
701     vfe_state->vfe1.children_present = 0;
702     vfe_state->vfe2.interface_descriptor_base = 
703         pp_context->idrt.bo->offset >> 4; /* reloc */
704     dri_bo_emit_reloc(bo,
705                       I915_GEM_DOMAIN_INSTRUCTION, 0,
706                       0,
707                       offsetof(struct i965_vfe_state, vfe2),
708                       pp_context->idrt.bo);
709     dri_bo_unmap(bo);
710 }
711
712 static void
713 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
714 {
715     unsigned char *constant_buffer;
716     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
717
718     assert(sizeof(*pp_static_parameter) == 128);
719     dri_bo_map(pp_context->curbe.bo, 1);
720     assert(pp_context->curbe.bo->virtual);
721     constant_buffer = pp_context->curbe.bo->virtual;
722     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
723     dri_bo_unmap(pp_context->curbe.bo);
724 }
725
726 static void
727 ironlake_pp_states_setup(VADriverContextP ctx,
728                          struct i965_post_processing_context *pp_context)
729 {
730     ironlake_pp_interface_descriptor_table(pp_context);
731     ironlake_pp_vfe_state(pp_context);
732     ironlake_pp_upload_constants(pp_context);
733 }
734
735 static void
736 ironlake_pp_pipeline_select(VADriverContextP ctx,
737                             struct i965_post_processing_context *pp_context)
738 {
739     struct intel_batchbuffer *batch = pp_context->batch;
740
741     BEGIN_BATCH(batch, 1);
742     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
743     ADVANCE_BATCH(batch);
744 }
745
746 static void
747 ironlake_pp_urb_layout(VADriverContextP ctx,
748                        struct i965_post_processing_context *pp_context)
749 {
750     struct intel_batchbuffer *batch = pp_context->batch;
751     unsigned int vfe_fence, cs_fence;
752
753     vfe_fence = pp_context->urb.cs_start;
754     cs_fence = pp_context->urb.size;
755
756     BEGIN_BATCH(batch, 3);
757     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
758     OUT_BATCH(batch, 0);
759     OUT_BATCH(batch, 
760               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
761               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
762     ADVANCE_BATCH(batch);
763 }
764
765 static void
766 ironlake_pp_state_base_address(VADriverContextP ctx,
767                                struct i965_post_processing_context *pp_context)
768 {
769     struct intel_batchbuffer *batch = pp_context->batch;
770
771     BEGIN_BATCH(batch, 8);
772     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
773     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
774     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
775     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
776     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
777     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
778     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
779     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
780     ADVANCE_BATCH(batch);
781 }
782
783 static void
784 ironlake_pp_state_pointers(VADriverContextP ctx,
785                            struct i965_post_processing_context *pp_context)
786 {
787     struct intel_batchbuffer *batch = pp_context->batch;
788
789     BEGIN_BATCH(batch, 3);
790     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
791     OUT_BATCH(batch, 0);
792     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
793     ADVANCE_BATCH(batch);
794 }
795
796 static void 
797 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
798                           struct i965_post_processing_context *pp_context)
799 {
800     struct intel_batchbuffer *batch = pp_context->batch;
801
802     BEGIN_BATCH(batch, 2);
803     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
804     OUT_BATCH(batch,
805               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
806               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
807     ADVANCE_BATCH(batch);
808 }
809
810 static void
811 ironlake_pp_constant_buffer(VADriverContextP ctx,
812                             struct i965_post_processing_context *pp_context)
813 {
814     struct intel_batchbuffer *batch = pp_context->batch;
815
816     BEGIN_BATCH(batch, 2);
817     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
818     OUT_RELOC(batch, pp_context->curbe.bo,
819               I915_GEM_DOMAIN_INSTRUCTION, 0,
820               pp_context->urb.size_cs_entry - 1);
821     ADVANCE_BATCH(batch);    
822 }
823
824 static void
825 ironlake_pp_object_walker(VADriverContextP ctx,
826                           struct i965_post_processing_context *pp_context)
827 {
828     struct intel_batchbuffer *batch = pp_context->batch;
829     int x, x_steps, y, y_steps;
830     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
831
832     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
833     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
834
835     for (y = 0; y < y_steps; y++) {
836         for (x = 0; x < x_steps; x++) {
837             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
838                 BEGIN_BATCH(batch, 20);
839                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
840                 OUT_BATCH(batch, 0);
841                 OUT_BATCH(batch, 0); /* no indirect data */
842                 OUT_BATCH(batch, 0);
843
844                 /* inline data grf 5-6 */
845                 assert(sizeof(*pp_inline_parameter) == 64);
846                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
847
848                 ADVANCE_BATCH(batch);
849             }
850         }
851     }
852 }
853
854 static void
855 ironlake_pp_pipeline_setup(VADriverContextP ctx,
856                            struct i965_post_processing_context *pp_context)
857 {
858     struct intel_batchbuffer *batch = pp_context->batch;
859
860     intel_batchbuffer_start_atomic(batch, 0x1000);
861     intel_batchbuffer_emit_mi_flush(batch);
862     ironlake_pp_pipeline_select(ctx, pp_context);
863     ironlake_pp_state_base_address(ctx, pp_context);
864     ironlake_pp_state_pointers(ctx, pp_context);
865     ironlake_pp_urb_layout(ctx, pp_context);
866     ironlake_pp_cs_urb_layout(ctx, pp_context);
867     ironlake_pp_constant_buffer(ctx, pp_context);
868     ironlake_pp_object_walker(ctx, pp_context);
869     intel_batchbuffer_end_atomic(batch);
870 }
871
872 static void
873 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
874                           dri_bo *surf_bo, unsigned long surf_bo_offset,
875                           int width, int height, int pitch, int format, 
876                           int index, int is_target)
877 {
878     struct i965_surface_state *ss;
879     dri_bo *ss_bo;
880     unsigned int tiling;
881     unsigned int swizzle;
882
883     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
884     ss_bo = pp_context->surface_state_binding_table.bo;
885     assert(ss_bo);
886
887     dri_bo_map(ss_bo, True);
888     assert(ss_bo->virtual);
889     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
890     memset(ss, 0, sizeof(*ss));
891     ss->ss0.surface_type = I965_SURFACE_2D;
892     ss->ss0.surface_format = format;
893     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
894     ss->ss2.width = width - 1;
895     ss->ss2.height = height - 1;
896     ss->ss3.pitch = pitch - 1;
897     pp_set_surface_tiling(ss, tiling);
898     dri_bo_emit_reloc(ss_bo,
899                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
900                       surf_bo_offset,
901                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
902                       surf_bo);
903     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
904     dri_bo_unmap(ss_bo);
905 }
906
907 static void
908 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
909                            dri_bo *surf_bo, unsigned long surf_bo_offset,
910                            int width, int height, int wpitch,
911                            int xoffset, int yoffset,
912                            int format, int interleave_chroma,
913                            int index)
914 {
915     struct i965_surface_state2 *ss2;
916     dri_bo *ss2_bo;
917     unsigned int tiling;
918     unsigned int swizzle;
919
920     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
921     ss2_bo = pp_context->surface_state_binding_table.bo;
922     assert(ss2_bo);
923
924     dri_bo_map(ss2_bo, True);
925     assert(ss2_bo->virtual);
926     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
927     memset(ss2, 0, sizeof(*ss2));
928     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
929     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
930     ss2->ss1.width = width - 1;
931     ss2->ss1.height = height - 1;
932     ss2->ss2.pitch = wpitch - 1;
933     ss2->ss2.interleave_chroma = interleave_chroma;
934     ss2->ss2.surface_format = format;
935     ss2->ss3.x_offset_for_cb = xoffset;
936     ss2->ss3.y_offset_for_cb = yoffset;
937     pp_set_surface2_tiling(ss2, tiling);
938     dri_bo_emit_reloc(ss2_bo,
939                       I915_GEM_DOMAIN_RENDER, 0,
940                       surf_bo_offset,
941                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
942                       surf_bo);
943     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
944     dri_bo_unmap(ss2_bo);
945 }
946
947 static void
948 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
949                           dri_bo *surf_bo, unsigned long surf_bo_offset,
950                           int width, int height, int pitch, int format, 
951                           int index, int is_target)
952 {
953     struct gen7_surface_state *ss;
954     dri_bo *ss_bo;
955     unsigned int tiling;
956     unsigned int swizzle;
957
958     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
959     ss_bo = pp_context->surface_state_binding_table.bo;
960     assert(ss_bo);
961
962     dri_bo_map(ss_bo, True);
963     assert(ss_bo->virtual);
964     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
965     memset(ss, 0, sizeof(*ss));
966     ss->ss0.surface_type = I965_SURFACE_2D;
967     ss->ss0.surface_format = format;
968     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
969     ss->ss2.width = width - 1;
970     ss->ss2.height = height - 1;
971     ss->ss3.pitch = pitch - 1;
972     gen7_pp_set_surface_tiling(ss, tiling);
973     dri_bo_emit_reloc(ss_bo,
974                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
975                       surf_bo_offset,
976                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
977                       surf_bo);
978     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
979     dri_bo_unmap(ss_bo);
980 }
981
982 static void
983 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
984                            dri_bo *surf_bo, unsigned long surf_bo_offset,
985                            int width, int height, int wpitch,
986                            int xoffset, int yoffset,
987                            int format, int interleave_chroma,
988                            int index)
989 {
990     struct gen7_surface_state2 *ss2;
991     dri_bo *ss2_bo;
992     unsigned int tiling;
993     unsigned int swizzle;
994
995     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
996     ss2_bo = pp_context->surface_state_binding_table.bo;
997     assert(ss2_bo);
998
999     dri_bo_map(ss2_bo, True);
1000     assert(ss2_bo->virtual);
1001     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1002     memset(ss2, 0, sizeof(*ss2));
1003     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1004     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1005     ss2->ss1.width = width - 1;
1006     ss2->ss1.height = height - 1;
1007     ss2->ss2.pitch = wpitch - 1;
1008     ss2->ss2.interleave_chroma = interleave_chroma;
1009     ss2->ss2.surface_format = format;
1010     ss2->ss3.x_offset_for_cb = xoffset;
1011     ss2->ss3.y_offset_for_cb = yoffset;
1012     gen7_pp_set_surface2_tiling(ss2, tiling);
1013     dri_bo_emit_reloc(ss2_bo,
1014                       I915_GEM_DOMAIN_RENDER, 0,
1015                       surf_bo_offset,
1016                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1017                       surf_bo);
1018     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1019     dri_bo_unmap(ss2_bo);
1020 }
1021
1022 static void 
1023 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1024                                 const struct i965_surface *surface, 
1025                                 int base_index, int is_target,
1026                                 int *width, int *height, int *pitch, int *offset)
1027 {
1028     struct i965_driver_data *i965 = i965_driver_data(ctx);
1029     struct object_surface *obj_surface;
1030     struct object_image *obj_image;
1031     dri_bo *bo;
1032     int fourcc = pp_get_surface_fourcc(ctx, surface);
1033     const int Y = 0;
1034     const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
1035     const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
1036     const int UV = 1;
1037     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1038
1039     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1040         obj_surface = SURFACE(surface->id);
1041         bo = obj_surface->bo;
1042         width[0] = obj_surface->orig_width;
1043         height[0] = obj_surface->orig_height;
1044         pitch[0] = obj_surface->width;
1045         offset[0] = 0;
1046
1047         if (interleaved_uv) {
1048             width[1] = obj_surface->orig_width;
1049             height[1] = obj_surface->orig_height / 2;
1050             pitch[1] = obj_surface->width;
1051             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1052         } else {
1053             width[1] = obj_surface->orig_width / 2;
1054             height[1] = obj_surface->orig_height / 2;
1055             pitch[1] = obj_surface->width / 2;
1056             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1057             width[2] = obj_surface->orig_width / 2;
1058             height[2] = obj_surface->orig_height / 2;
1059             pitch[2] = obj_surface->width / 2;
1060             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
1061         }
1062     } else {
1063         obj_image = IMAGE(surface->id);
1064         bo = obj_image->bo;
1065         width[0] = obj_image->image.width;
1066         height[0] = obj_image->image.height;
1067         pitch[0] = obj_image->image.pitches[0];
1068         offset[0] = obj_image->image.offsets[0];
1069
1070         if (interleaved_uv) {
1071             width[1] = obj_image->image.width;
1072             height[1] = obj_image->image.height / 2;
1073             pitch[1] = obj_image->image.pitches[1];
1074             offset[1] = obj_image->image.offsets[1];
1075         } else {
1076             width[1] = obj_image->image.width / 2;
1077             height[1] = obj_image->image.height / 2;
1078             pitch[1] = obj_image->image.pitches[1];
1079             offset[1] = obj_image->image.offsets[1];
1080             width[2] = obj_image->image.width / 2;
1081             height[2] = obj_image->image.height / 2;
1082             pitch[2] = obj_image->image.pitches[2];
1083             offset[2] = obj_image->image.offsets[2];
1084         }
1085     }
1086
1087     /* Y surface */
1088     i965_pp_set_surface_state(ctx, pp_context,
1089                               bo, offset[Y],
1090                               width[Y] / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
1091                               base_index, is_target);
1092
1093     if (interleaved_uv) {
1094         i965_pp_set_surface_state(ctx, pp_context,
1095                                   bo, offset[UV],
1096                                   width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
1097                                   base_index + 1, is_target);
1098     } else {
1099         /* U surface */
1100         i965_pp_set_surface_state(ctx, pp_context,
1101                                   bo, offset[U],
1102                                   width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
1103                                   base_index + 1, is_target);
1104
1105         /* V surface */
1106         i965_pp_set_surface_state(ctx, pp_context,
1107                                   bo, offset[V],
1108                                   width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
1109                                   base_index + 2, is_target);
1110     }
1111
1112 }
1113
1114 static void 
1115 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1116                                      const struct i965_surface *surface, 
1117                                      int base_index, int is_target,
1118                                      int *width, int *height, int *pitch, int *offset)
1119 {
1120     struct i965_driver_data *i965 = i965_driver_data(ctx);
1121     struct object_surface *obj_surface;
1122     struct object_image *obj_image;
1123     dri_bo *bo;
1124     int fourcc = pp_get_surface_fourcc(ctx, surface);
1125     const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1126                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1;
1127     const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1128                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2;
1129     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1130
1131     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1132         obj_surface = SURFACE(surface->id);
1133         bo = obj_surface->bo;
1134         width[0] = obj_surface->orig_width;
1135         height[0] = obj_surface->orig_height;
1136         pitch[0] = obj_surface->width;
1137         offset[0] = 0;
1138
1139         width[1] = obj_surface->cb_cr_width;
1140         height[1] = obj_surface->cb_cr_height;
1141         pitch[1] = obj_surface->cb_cr_pitch;
1142         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
1143
1144         width[2] = obj_surface->cb_cr_width;
1145         height[2] = obj_surface->cb_cr_height;
1146         pitch[2] = obj_surface->cb_cr_pitch;
1147         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
1148     } else {
1149         obj_image = IMAGE(surface->id);
1150         bo = obj_image->bo;
1151         width[0] = obj_image->image.width;
1152         height[0] = obj_image->image.height;
1153         pitch[0] = obj_image->image.pitches[0];
1154         offset[0] = obj_image->image.offsets[0];
1155
1156         if (interleaved_uv) {
1157             width[1] = obj_image->image.width;
1158             height[1] = obj_image->image.height / 2;
1159             pitch[1] = obj_image->image.pitches[1];
1160             offset[1] = obj_image->image.offsets[1];
1161         } else {
1162             width[1] = obj_image->image.width / 2;
1163             height[1] = obj_image->image.height / 2;
1164             pitch[1] = obj_image->image.pitches[U];
1165             offset[1] = obj_image->image.offsets[U];
1166             width[2] = obj_image->image.width / 2;
1167             height[2] = obj_image->image.height / 2;
1168             pitch[2] = obj_image->image.pitches[V];
1169             offset[2] = obj_image->image.offsets[V];
1170         }
1171     }
1172
1173     if (is_target) {
1174         gen7_pp_set_surface_state(ctx, pp_context,
1175                                   bo, 0,
1176                                   width[0] / 4, height[0], pitch[0],
1177                                   I965_SURFACEFORMAT_R8_SINT,
1178                                   base_index, 1);
1179
1180         if (interleaved_uv) {
1181             gen7_pp_set_surface_state(ctx, pp_context,
1182                                       bo, offset[1],
1183                                       width[1] / 2, height[1], pitch[1],
1184                                       I965_SURFACEFORMAT_R8G8_SINT,
1185                                       base_index + 1, 1);
1186         } else {
1187             gen7_pp_set_surface_state(ctx, pp_context,
1188                                       bo, offset[1],
1189                                       width[1] / 4, height[1], pitch[1],
1190                                       I965_SURFACEFORMAT_R8_SINT,
1191                                       base_index + 1, 1);
1192             gen7_pp_set_surface_state(ctx, pp_context,
1193                                       bo, offset[2],
1194                                       width[2] / 4, height[2], pitch[2],
1195                                       I965_SURFACEFORMAT_R8_SINT,
1196                                       base_index + 2, 1);
1197         }
1198     } else {
1199         gen7_pp_set_surface2_state(ctx, pp_context,
1200                                    bo, offset[0],
1201                                    width[0], height[0], pitch[0],
1202                                    0, 0,
1203                                    SURFACE_FORMAT_Y8_UNORM, 0,
1204                                    base_index);
1205
1206         if (interleaved_uv) {
1207             gen7_pp_set_surface2_state(ctx, pp_context,
1208                                        bo, offset[1],
1209                                        width[1], height[1], pitch[1],
1210                                        0, 0,
1211                                        SURFACE_FORMAT_R8B8_UNORM, 0,
1212                                        base_index + 1);
1213         } else {
1214             gen7_pp_set_surface2_state(ctx, pp_context,
1215                                        bo, offset[1],
1216                                        width[1], height[1], pitch[1],
1217                                        0, 0,
1218                                        SURFACE_FORMAT_R8_UNORM, 0,
1219                                        base_index + 1);
1220             gen7_pp_set_surface2_state(ctx, pp_context,
1221                                        bo, offset[2],
1222                                        width[2], height[2], pitch[2],
1223                                        0, 0,
1224                                        SURFACE_FORMAT_R8_UNORM, 0,
1225                                        base_index + 2);
1226         }
1227     }
1228 }
1229
1230 static int
1231 pp_null_x_steps(void *private_context)
1232 {
1233     return 1;
1234 }
1235
1236 static int
1237 pp_null_y_steps(void *private_context)
1238 {
1239     return 1;
1240 }
1241
1242 static int
1243 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1244 {
1245     return 0;
1246 }
1247
1248 static VAStatus
1249 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1250                    const struct i965_surface *src_surface,
1251                    const VARectangle *src_rect,
1252                    struct i965_surface *dst_surface,
1253                    const VARectangle *dst_rect,
1254                    void *filter_param)
1255 {
1256     /* private function & data */
1257     pp_context->pp_x_steps = pp_null_x_steps;
1258     pp_context->pp_y_steps = pp_null_y_steps;
1259     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
1260
1261     dst_surface->flags = src_surface->flags;
1262
1263     return VA_STATUS_SUCCESS;
1264 }
1265
1266 static int
1267 pp_load_save_x_steps(void *private_context)
1268 {
1269     return 1;
1270 }
1271
1272 static int
1273 pp_load_save_y_steps(void *private_context)
1274 {
1275     struct pp_load_save_context *pp_load_save_context = private_context;
1276
1277     return pp_load_save_context->dest_h / 8;
1278 }
1279
1280 static int
1281 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1282 {
1283     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1284
1285     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
1286     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
1287     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
1288     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
1289
1290     return 0;
1291 }
1292
1293 static VAStatus
1294 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1295                                 const struct i965_surface *src_surface,
1296                                 const VARectangle *src_rect,
1297                                 struct i965_surface *dst_surface,
1298                                 const VARectangle *dst_rect,
1299                                 void *filter_param)
1300 {
1301     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
1302     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1303     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1304     int width[3], height[3], pitch[3], offset[3];
1305     const int Y = 0;
1306
1307     /* source surface */
1308     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
1309                                     width, height, pitch, offset);
1310
1311     /* destination surface */
1312     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
1313                                     width, height, pitch, offset);
1314
1315     /* private function & data */
1316     pp_context->pp_x_steps = pp_load_save_x_steps;
1317     pp_context->pp_y_steps = pp_load_save_y_steps;
1318     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
1319     pp_load_save_context->dest_h = ALIGN(height[Y], 16);
1320     pp_load_save_context->dest_w = ALIGN(width[Y], 16);
1321
1322     pp_inline_parameter->grf5.block_count_x = ALIGN(width[Y], 16) / 16;   /* 1 x N */
1323     pp_inline_parameter->grf5.number_blocks = ALIGN(width[Y], 16) / 16;
1324
1325     pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
1326     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
1327
1328     dst_surface->flags = src_surface->flags;
1329
1330     return VA_STATUS_SUCCESS;
1331 }
1332
1333 static int
1334 pp_scaling_x_steps(void *private_context)
1335 {
1336     return 1;
1337 }
1338
1339 static int
1340 pp_scaling_y_steps(void *private_context)
1341 {
1342     struct pp_scaling_context *pp_scaling_context = private_context;
1343
1344     return pp_scaling_context->dest_h / 8;
1345 }
1346
1347 static int
1348 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1349 {
1350     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
1351     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1352     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1353     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1354     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
1355
1356     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
1357     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
1358     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
1359     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
1360     
1361     return 0;
1362 }
1363
1364 static VAStatus
1365 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1366                            const struct i965_surface *src_surface,
1367                            const VARectangle *src_rect,
1368                            struct i965_surface *dst_surface,
1369                            const VARectangle *dst_rect,
1370                            void *filter_param)
1371 {
1372     struct i965_driver_data *i965 = i965_driver_data(ctx);
1373     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
1374     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1375     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1376     struct object_surface *obj_surface;
1377     struct i965_sampler_state *sampler_state;
1378     int in_w, in_h, in_wpitch, in_hpitch;
1379     int out_w, out_h, out_wpitch, out_hpitch;
1380
1381     /* source surface */
1382     obj_surface = SURFACE(src_surface->id);
1383     in_w = obj_surface->orig_width;
1384     in_h = obj_surface->orig_height;
1385     in_wpitch = obj_surface->width;
1386     in_hpitch = obj_surface->height;
1387
1388     /* source Y surface index 1 */
1389     i965_pp_set_surface_state(ctx, pp_context,
1390                               obj_surface->bo, 0,
1391                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1392                               1, 0);
1393
1394     /* source UV surface index 2 */
1395     i965_pp_set_surface_state(ctx, pp_context,
1396                               obj_surface->bo, in_wpitch * in_hpitch,
1397                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1398                               2, 0);
1399
1400     /* destination surface */
1401     obj_surface = SURFACE(dst_surface->id);
1402     out_w = obj_surface->orig_width;
1403     out_h = obj_surface->orig_height;
1404     out_wpitch = obj_surface->width;
1405     out_hpitch = obj_surface->height;
1406
1407     /* destination Y surface index 7 */
1408     i965_pp_set_surface_state(ctx, pp_context,
1409                               obj_surface->bo, 0,
1410                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1411                               7, 1);
1412
1413     /* destination UV surface index 8 */
1414     i965_pp_set_surface_state(ctx, pp_context,
1415                               obj_surface->bo, out_wpitch * out_hpitch,
1416                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1417                               8, 1);
1418
1419     /* sampler state */
1420     dri_bo_map(pp_context->sampler_state_table.bo, True);
1421     assert(pp_context->sampler_state_table.bo->virtual);
1422     sampler_state = pp_context->sampler_state_table.bo->virtual;
1423
1424     /* SIMD16 Y index 1 */
1425     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
1426     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1427     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1428     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1429     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1430
1431     /* SIMD16 UV index 2 */
1432     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
1433     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1434     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1435     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1436     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1437
1438     dri_bo_unmap(pp_context->sampler_state_table.bo);
1439
1440     /* private function & data */
1441     pp_context->pp_x_steps = pp_scaling_x_steps;
1442     pp_context->pp_y_steps = pp_scaling_y_steps;
1443     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
1444
1445     pp_scaling_context->dest_x = dst_rect->x;
1446     pp_scaling_context->dest_y = dst_rect->y;
1447     pp_scaling_context->dest_w = ALIGN(dst_rect->width, 16);
1448     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 16);
1449     pp_scaling_context->src_normalized_x = (float)src_rect->x / in_w;
1450     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
1451
1452     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
1453
1454     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / dst_rect->width;
1455     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
1456     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
1457     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
1458     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
1459
1460     dst_surface->flags = src_surface->flags;
1461
1462     return VA_STATUS_SUCCESS;
1463 }
1464
1465 static int
1466 pp_avs_x_steps(void *private_context)
1467 {
1468     struct pp_avs_context *pp_avs_context = private_context;
1469
1470     return pp_avs_context->dest_w / 16;
1471 }
1472
1473 static int
1474 pp_avs_y_steps(void *private_context)
1475 {
1476     return 1;
1477 }
1478
1479 static int
1480 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1481 {
1482     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1483     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1484     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1485     float src_x_steping, src_y_steping, video_step_delta;
1486     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
1487
1488     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
1489         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1490         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
1491     } else if (tmp_w >= pp_avs_context->dest_w) {
1492         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1493         pp_inline_parameter->grf6.video_step_delta = 0;
1494         
1495         if (x == 0) {
1496             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
1497                 pp_avs_context->src_normalized_x;
1498         } else {
1499             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1500             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1501             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1502                 16 * 15 * video_step_delta / 2;
1503         }
1504     } else {
1505         int n0, n1, n2, nls_left, nls_right;
1506         int factor_a = 5, factor_b = 4;
1507         float f;
1508
1509         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1510         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1511         n2 = tmp_w / (16 * factor_a);
1512         nls_left = n0 + n2;
1513         nls_right = n1 + n2;
1514         f = (float) n2 * 16 / tmp_w;
1515         
1516         if (n0 < 5) {
1517             pp_inline_parameter->grf6.video_step_delta = 0.0;
1518
1519             if (x == 0) {
1520                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1521                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1522             } else {
1523                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1524                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1525                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1526                     16 * 15 * video_step_delta / 2;
1527             }
1528         } else {
1529             if (x < nls_left) {
1530                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1531                 float a = f / (nls_left * 16 * factor_b);
1532                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1533                 
1534                 pp_inline_parameter->grf6.video_step_delta = b;
1535
1536                 if (x == 0) {
1537                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1538                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
1539                 } else {
1540                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1541                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1542                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1543                         16 * 15 * video_step_delta / 2;
1544                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
1545                 }
1546             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1547                 /* scale the center linearly */
1548                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1549                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1550                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1551                     16 * 15 * video_step_delta / 2;
1552                 pp_inline_parameter->grf6.video_step_delta = 0.0;
1553                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1554             } else {
1555                 float a = f / (nls_right * 16 * factor_b);
1556                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1557
1558                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1559                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1560                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1561                     16 * 15 * video_step_delta / 2;
1562                 pp_inline_parameter->grf6.video_step_delta = -b;
1563
1564                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
1565                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
1566                 else
1567                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
1568             }
1569         }
1570     }
1571
1572     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
1573     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
1574     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
1575     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
1576
1577     return 0;
1578 }
1579
1580 static VAStatus
1581 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1582                        const struct i965_surface *src_surface,
1583                        const VARectangle *src_rect,
1584                        struct i965_surface *dst_surface,
1585                        const VARectangle *dst_rect,
1586                        void *filter_param,
1587                        int nlas)
1588 {
1589     struct i965_driver_data *i965 = i965_driver_data(ctx);
1590     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1591     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1592     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1593     struct object_surface *obj_surface;
1594     struct i965_sampler_8x8 *sampler_8x8;
1595     struct i965_sampler_8x8_state *sampler_8x8_state;
1596     int index;
1597     int in_w, in_h, in_wpitch, in_hpitch;
1598     int out_w, out_h, out_wpitch, out_hpitch;
1599     int i;
1600
1601     /* surface */
1602     obj_surface = SURFACE(src_surface->id);
1603     in_w = obj_surface->orig_width;
1604     in_h = obj_surface->orig_height;
1605     in_wpitch = obj_surface->width;
1606     in_hpitch = obj_surface->height;
1607
1608     /* source Y surface index 1 */
1609     i965_pp_set_surface2_state(ctx, pp_context,
1610                                obj_surface->bo, 0,
1611                                in_w, in_h, in_wpitch,
1612                                0, 0,
1613                                SURFACE_FORMAT_Y8_UNORM, 0,
1614                                1);
1615
1616     /* source UV surface index 2 */
1617     i965_pp_set_surface2_state(ctx, pp_context,
1618                                obj_surface->bo, in_wpitch * in_hpitch,
1619                                in_w / 2, in_h / 2, in_wpitch,
1620                                0, 0,
1621                                SURFACE_FORMAT_R8B8_UNORM, 0,
1622                                2);
1623
1624     /* destination surface */
1625     obj_surface = SURFACE(dst_surface->id);
1626     out_w = obj_surface->orig_width;
1627     out_h = obj_surface->orig_height;
1628     out_wpitch = obj_surface->width;
1629     out_hpitch = obj_surface->height;
1630     assert(out_w <= out_wpitch && out_h <= out_hpitch);
1631
1632     /* destination Y surface index 7 */
1633     i965_pp_set_surface_state(ctx, pp_context,
1634                               obj_surface->bo, 0,
1635                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1636                               7, 1);
1637
1638     /* destination UV surface index 8 */
1639     i965_pp_set_surface_state(ctx, pp_context,
1640                               obj_surface->bo, out_wpitch * out_hpitch,
1641                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1642                               8, 1);
1643
1644     /* sampler 8x8 state */
1645     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1646     assert(pp_context->sampler_state_table.bo_8x8->virtual);
1647     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1648     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1649     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1650
1651     for (i = 0; i < 17; i++) {
1652         /* for Y channel, currently ignore */
1653         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x00;
1654         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x00;
1655         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x08;
1656         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x18;
1657         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x18;
1658         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x08;
1659         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x00;
1660         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x00;
1661         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x00;
1662         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x00;
1663         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x10;
1664         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x10;
1665         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x10;
1666         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x10;
1667         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x00;
1668         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x00;
1669         /* for U/V channel, 0.25 */
1670         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
1671         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
1672         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
1673         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
1674         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
1675         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
1676         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
1677         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
1678         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
1679         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
1680         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
1681         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
1682         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
1683         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
1684         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
1685         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
1686     }
1687
1688     sampler_8x8_state->dw136.default_sharpness_level = 0;
1689     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1690     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1691     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1692     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1693
1694     /* sampler 8x8 */
1695     dri_bo_map(pp_context->sampler_state_table.bo, True);
1696     assert(pp_context->sampler_state_table.bo->virtual);
1697     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
1698     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1699
1700     /* sample_8x8 Y index 1 */
1701     index = 1;
1702     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1703     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1704     sampler_8x8[index].dw0.ief_bypass = 1;
1705     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1706     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1707     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1708     sampler_8x8[index].dw2.global_noise_estimation = 22;
1709     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1710     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1711     sampler_8x8[index].dw3.strong_edge_weight = 7;
1712     sampler_8x8[index].dw3.regular_weight = 2;
1713     sampler_8x8[index].dw3.non_edge_weight = 0;
1714     sampler_8x8[index].dw3.gain_factor = 40;
1715     sampler_8x8[index].dw4.steepness_boost = 0;
1716     sampler_8x8[index].dw4.steepness_threshold = 0;
1717     sampler_8x8[index].dw4.mr_boost = 0;
1718     sampler_8x8[index].dw4.mr_threshold = 5;
1719     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1720     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1721     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1722     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1723     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1724     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1725     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1726     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1727     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1728     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1729     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1730     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1731     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1732     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1733     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1734     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1735     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1736     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1737     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1738     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1739     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1740     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1741     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1742     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1743     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1744     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1745     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1746     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1747     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1748     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1749     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1750     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1751     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1752     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1753     sampler_8x8[index].dw13.limiter_boost = 0;
1754     sampler_8x8[index].dw13.minimum_limiter = 10;
1755     sampler_8x8[index].dw13.maximum_limiter = 11;
1756     sampler_8x8[index].dw14.clip_limiter = 130;
1757     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1758                       I915_GEM_DOMAIN_RENDER, 
1759                       0,
1760                       0,
1761                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1762                       pp_context->sampler_state_table.bo_8x8);
1763
1764     /* sample_8x8 UV index 2 */
1765     index = 2;
1766     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1767     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1768     sampler_8x8[index].dw0.ief_bypass = 1;
1769     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1770     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1771     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1772     sampler_8x8[index].dw2.global_noise_estimation = 22;
1773     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1774     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1775     sampler_8x8[index].dw3.strong_edge_weight = 7;
1776     sampler_8x8[index].dw3.regular_weight = 2;
1777     sampler_8x8[index].dw3.non_edge_weight = 0;
1778     sampler_8x8[index].dw3.gain_factor = 40;
1779     sampler_8x8[index].dw4.steepness_boost = 0;
1780     sampler_8x8[index].dw4.steepness_threshold = 0;
1781     sampler_8x8[index].dw4.mr_boost = 0;
1782     sampler_8x8[index].dw4.mr_threshold = 5;
1783     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1784     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1785     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1786     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1787     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1788     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1789     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1790     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1791     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1792     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1793     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1794     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1795     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1796     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1797     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1798     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1799     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1800     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1801     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1802     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1803     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1804     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1805     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1806     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1807     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1808     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1809     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1810     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1811     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1812     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1813     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1814     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1815     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1816     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1817     sampler_8x8[index].dw13.limiter_boost = 0;
1818     sampler_8x8[index].dw13.minimum_limiter = 10;
1819     sampler_8x8[index].dw13.maximum_limiter = 11;
1820     sampler_8x8[index].dw14.clip_limiter = 130;
1821     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1822                       I915_GEM_DOMAIN_RENDER, 
1823                       0,
1824                       0,
1825                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1826                       pp_context->sampler_state_table.bo_8x8);
1827
1828     dri_bo_unmap(pp_context->sampler_state_table.bo);
1829
1830     /* private function & data */
1831     pp_context->pp_x_steps = pp_avs_x_steps;
1832     pp_context->pp_y_steps = pp_avs_y_steps;
1833     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
1834
1835     pp_avs_context->dest_x = dst_rect->x;
1836     pp_avs_context->dest_y = dst_rect->y;
1837     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
1838     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
1839     pp_avs_context->src_normalized_x = (float)src_rect->x / in_w;
1840     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
1841     pp_avs_context->src_w = src_rect->width;
1842     pp_avs_context->src_h = src_rect->height;
1843
1844     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
1845     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
1846
1847     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / dst_rect->width;
1848     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
1849     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
1850     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
1851     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
1852     pp_inline_parameter->grf6.video_step_delta = 0.0;
1853
1854     dst_surface->flags = src_surface->flags;
1855
1856     return VA_STATUS_SUCCESS;
1857 }
1858
1859 static VAStatus
1860 pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1861                             const struct i965_surface *src_surface,
1862                             const VARectangle *src_rect,
1863                             struct i965_surface *dst_surface,
1864                             const VARectangle *dst_rect,
1865                             void *filter_param)
1866 {
1867     return pp_nv12_avs_initialize(ctx, pp_context,
1868                                   src_surface,
1869                                   src_rect,
1870                                   dst_surface,
1871                                   dst_rect,
1872                                   filter_param,
1873                                   1);
1874 }
1875
1876 static VAStatus
1877 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1878                              const struct i965_surface *src_surface,
1879                              const VARectangle *src_rect,
1880                              struct i965_surface *dst_surface,
1881                              const VARectangle *dst_rect,
1882                              void *filter_param)
1883 {
1884     return pp_nv12_avs_initialize(ctx, pp_context,
1885                                   src_surface,
1886                                   src_rect,
1887                                   dst_surface,
1888                                   dst_rect,
1889                                   filter_param,
1890                                   0);    
1891 }
1892
1893 static int
1894 gen7_pp_avs_x_steps(void *private_context)
1895 {
1896     struct pp_avs_context *pp_avs_context = private_context;
1897
1898     return pp_avs_context->dest_w / 16;
1899 }
1900
1901 static int
1902 gen7_pp_avs_y_steps(void *private_context)
1903 {
1904     struct pp_avs_context *pp_avs_context = private_context;
1905
1906     return pp_avs_context->dest_h / 16;
1907 }
1908
1909 static int
1910 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1911 {
1912     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1913     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1914
1915     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
1916     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
1917     pp_inline_parameter->grf7.constant_0 = 0xffffffff;
1918     pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = 1.0 / pp_avs_context->src_w;
1919
1920     return 0;
1921 }
1922
1923 static VAStatus
1924 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1925                            const struct i965_surface *src_surface,
1926                            const VARectangle *src_rect,
1927                            struct i965_surface *dst_surface,
1928                            const VARectangle *dst_rect,
1929                            void *filter_param)
1930 {
1931     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1932     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1933     struct gen7_sampler_8x8 *sampler_8x8;
1934     struct i965_sampler_8x8_state *sampler_8x8_state;
1935     int index, i;
1936     int width[3], height[3], pitch[3], offset[3];
1937
1938     /* source surface */
1939     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
1940                                          width, height, pitch, offset);
1941
1942     /* destination surface */
1943     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
1944                                          width, height, pitch, offset);
1945
1946     /* sampler 8x8 state */
1947     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1948     assert(pp_context->sampler_state_table.bo_8x8->virtual);
1949     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1950     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1951     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1952
1953     for (i = 0; i < 17; i++) {
1954         /* for Y channel, currently ignore */
1955         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
1956         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
1957         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
1958         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x0;
1959         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x0;
1960         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
1961         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
1962         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
1963         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
1964         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
1965         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
1966         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x0;
1967         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x0;
1968         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
1969         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
1970         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
1971         /* for U/V channel, 0.25 */
1972         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
1973         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
1974         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
1975         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
1976         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
1977         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
1978         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
1979         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
1980         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
1981         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
1982         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
1983         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
1984         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
1985         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
1986         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
1987         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
1988     }
1989
1990     sampler_8x8_state->dw136.default_sharpness_level = 0;
1991     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1992     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1993     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1994     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1995
1996     /* sampler 8x8 */
1997     dri_bo_map(pp_context->sampler_state_table.bo, True);
1998     assert(pp_context->sampler_state_table.bo->virtual);
1999     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
2000     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2001
2002     /* sample_8x8 Y index 4 */
2003     index = 4;
2004     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2005     sampler_8x8[index].dw0.global_noise_estimation = 255;
2006     sampler_8x8[index].dw0.ief_bypass = 1;
2007
2008     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2009
2010     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2011     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2012     sampler_8x8[index].dw2.r5x_coefficient = 9;
2013     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2014     sampler_8x8[index].dw2.r5c_coefficient = 3;
2015
2016     sampler_8x8[index].dw3.r3x_coefficient = 27;
2017     sampler_8x8[index].dw3.r3c_coefficient = 5;
2018     sampler_8x8[index].dw3.gain_factor = 40;
2019     sampler_8x8[index].dw3.non_edge_weight = 1;
2020     sampler_8x8[index].dw3.regular_weight = 2;
2021     sampler_8x8[index].dw3.strong_edge_weight = 7;
2022     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2023
2024     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2025                       I915_GEM_DOMAIN_RENDER, 
2026                       0,
2027                       0,
2028                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2029                       pp_context->sampler_state_table.bo_8x8);
2030
2031     /* sample_8x8 UV index 8 */
2032     index = 8;
2033     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2034     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2035     sampler_8x8[index].dw0.global_noise_estimation = 255;
2036     sampler_8x8[index].dw0.ief_bypass = 1;
2037     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2038     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2039     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2040     sampler_8x8[index].dw2.r5x_coefficient = 9;
2041     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2042     sampler_8x8[index].dw2.r5c_coefficient = 3;
2043     sampler_8x8[index].dw3.r3x_coefficient = 27;
2044     sampler_8x8[index].dw3.r3c_coefficient = 5;
2045     sampler_8x8[index].dw3.gain_factor = 40;
2046     sampler_8x8[index].dw3.non_edge_weight = 1;
2047     sampler_8x8[index].dw3.regular_weight = 2;
2048     sampler_8x8[index].dw3.strong_edge_weight = 7;
2049     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2050
2051     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2052                       I915_GEM_DOMAIN_RENDER, 
2053                       0,
2054                       0,
2055                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2056                       pp_context->sampler_state_table.bo_8x8);
2057
2058     /* sampler_8x8 V, index 12 */
2059     index = 12;
2060     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2061     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2062     sampler_8x8[index].dw0.global_noise_estimation = 255;
2063     sampler_8x8[index].dw0.ief_bypass = 1;
2064     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2065     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2066     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2067     sampler_8x8[index].dw2.r5x_coefficient = 9;
2068     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2069     sampler_8x8[index].dw2.r5c_coefficient = 3;
2070     sampler_8x8[index].dw3.r3x_coefficient = 27;
2071     sampler_8x8[index].dw3.r3c_coefficient = 5;
2072     sampler_8x8[index].dw3.gain_factor = 40;
2073     sampler_8x8[index].dw3.non_edge_weight = 1;
2074     sampler_8x8[index].dw3.regular_weight = 2;
2075     sampler_8x8[index].dw3.strong_edge_weight = 7;
2076     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2077
2078     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2079                       I915_GEM_DOMAIN_RENDER, 
2080                       0,
2081                       0,
2082                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2083                       pp_context->sampler_state_table.bo_8x8);
2084
2085     dri_bo_unmap(pp_context->sampler_state_table.bo);
2086
2087     /* private function & data */
2088     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
2089     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
2090     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
2091
2092     pp_avs_context->dest_x = dst_rect->x;
2093     pp_avs_context->dest_y = dst_rect->y;
2094     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
2095     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
2096     pp_avs_context->src_w = src_rect->width;
2097     pp_avs_context->src_h = src_rect->height;
2098
2099     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
2100     dw = MAX(dw, pp_avs_context->dest_w);
2101
2102     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2103     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
2104     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) 1.0 / pp_avs_context->dest_h;
2105     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = -(float)pp_avs_context->dest_y / pp_avs_context->dest_h;
2106     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = -(float)pp_avs_context->dest_x / dw;
2107
2108     dst_surface->flags = src_surface->flags;
2109
2110     return VA_STATUS_SUCCESS;
2111 }
2112
2113 static int
2114 pp_dndi_x_steps(void *private_context)
2115 {
2116     return 1;
2117 }
2118
2119 static int
2120 pp_dndi_y_steps(void *private_context)
2121 {
2122     struct pp_dndi_context *pp_dndi_context = private_context;
2123
2124     return pp_dndi_context->dest_h / 4;
2125 }
2126
2127 static int
2128 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2129 {
2130     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2131
2132     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2133     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
2134
2135     return 0;
2136 }
2137
2138 static VAStatus
2139 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2140                         const struct i965_surface *src_surface,
2141                         const VARectangle *src_rect,
2142                         struct i965_surface *dst_surface,
2143                         const VARectangle *dst_rect,
2144                         void *filter_param)
2145 {
2146     struct i965_driver_data *i965 = i965_driver_data(ctx);
2147     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
2148     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2149     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2150     struct object_surface *obj_surface;
2151     struct i965_sampler_dndi *sampler_dndi;
2152     int index;
2153     int w, h;
2154     int orig_w, orig_h;
2155     int dndi_top_first = 1;
2156
2157     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
2158         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
2159
2160     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
2161         dndi_top_first = 1;
2162     else
2163         dndi_top_first = 0;
2164
2165     /* surface */
2166     obj_surface = SURFACE(src_surface->id);
2167     orig_w = obj_surface->orig_width;
2168     orig_h = obj_surface->orig_height;
2169     w = obj_surface->width;
2170     h = obj_surface->height;
2171
2172     if (pp_context->stmm.bo == NULL) {
2173         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2174                                            "STMM surface",
2175                                            w * h,
2176                                            4096);
2177         assert(pp_context->stmm.bo);
2178     }
2179
2180     /* source UV surface index 2 */
2181     i965_pp_set_surface_state(ctx, pp_context,
2182                               obj_surface->bo, w * h,
2183                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2184                               2, 0);
2185
2186     /* source YUV surface index 4 */
2187     i965_pp_set_surface2_state(ctx, pp_context,
2188                                obj_surface->bo, 0,
2189                                orig_w, orig_h, w,
2190                                0, h,
2191                                SURFACE_FORMAT_PLANAR_420_8, 1,
2192                                4);
2193
2194     /* source STMM surface index 20 */
2195     i965_pp_set_surface_state(ctx, pp_context,
2196                               pp_context->stmm.bo, 0,
2197                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2198                               20, 1);
2199
2200     /* destination surface */
2201     obj_surface = SURFACE(dst_surface->id);
2202     orig_w = obj_surface->orig_width;
2203     orig_h = obj_surface->orig_height;
2204     w = obj_surface->width;
2205     h = obj_surface->height;
2206
2207     /* destination Y surface index 7 */
2208     i965_pp_set_surface_state(ctx, pp_context,
2209                               obj_surface->bo, 0,
2210                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2211                               7, 1);
2212
2213     /* destination UV surface index 8 */
2214     i965_pp_set_surface_state(ctx, pp_context,
2215                               obj_surface->bo, w * h,
2216                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2217                               8, 1);
2218     /* sampler dndi */
2219     dri_bo_map(pp_context->sampler_state_table.bo, True);
2220     assert(pp_context->sampler_state_table.bo->virtual);
2221     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2222     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2223
2224     /* sample dndi index 1 */
2225     index = 0;
2226     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2227     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2228     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2229     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2230
2231     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2232     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 4;
2233     sampler_dndi[index].dw1.stmm_c2 = 1;
2234     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2235     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2236
2237     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
2238     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2239     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2240     sampler_dndi[index].dw2.good_neighbor_threshold = 4;                // 0-63
2241
2242     sampler_dndi[index].dw3.maximum_stmm = 128;
2243     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2244     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2245     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2246     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2247
2248     sampler_dndi[index].dw4.sdi_delta = 8;
2249     sampler_dndi[index].dw4.sdi_threshold = 128;
2250     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2251     sampler_dndi[index].dw4.stmm_shift_up = 0;
2252     sampler_dndi[index].dw4.stmm_shift_down = 0;
2253     sampler_dndi[index].dw4.minimum_stmm = 0;
2254
2255     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 8;
2256     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 32;
2257     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 64;
2258     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 32;
2259
2260     sampler_dndi[index].dw6.dn_enable = 1;
2261     sampler_dndi[index].dw6.di_enable = 1;
2262     sampler_dndi[index].dw6.di_partial = 0;
2263     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2264     sampler_dndi[index].dw6.dndi_stream_id = 0;
2265     sampler_dndi[index].dw6.dndi_first_frame = 1;
2266     sampler_dndi[index].dw6.progressive_dn = 0;
2267     sampler_dndi[index].dw6.fmd_tear_threshold = 63;
2268     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2269     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2270
2271     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
2272     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
2273     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2274     sampler_dndi[index].dw7.column_width_minus1 = 0;
2275
2276     dri_bo_unmap(pp_context->sampler_state_table.bo);
2277
2278     /* private function & data */
2279     pp_context->pp_x_steps = pp_dndi_x_steps;
2280     pp_context->pp_y_steps = pp_dndi_y_steps;
2281     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
2282
2283     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
2284     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
2285     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
2286     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
2287
2288     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
2289     pp_inline_parameter->grf5.number_blocks = w / 16;
2290     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2291     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2292
2293     pp_dndi_context->dest_w = w;
2294     pp_dndi_context->dest_h = h;
2295
2296     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
2297
2298     return VA_STATUS_SUCCESS;
2299 }
2300
2301 static int
2302 pp_dn_x_steps(void *private_context)
2303 {
2304     return 1;
2305 }
2306
2307 static int
2308 pp_dn_y_steps(void *private_context)
2309 {
2310     struct pp_dn_context *pp_dn_context = private_context;
2311
2312     return pp_dn_context->dest_h / 8;
2313 }
2314
2315 static int
2316 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2317 {
2318     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2319
2320     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2321     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
2322
2323     return 0;
2324 }
2325
2326 static VAStatus
2327 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2328                       const struct i965_surface *src_surface,
2329                       const VARectangle *src_rect,
2330                       struct i965_surface *dst_surface,
2331                       const VARectangle *dst_rect,
2332                       void *filter_param)
2333 {
2334     struct i965_driver_data *i965 = i965_driver_data(ctx);
2335     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
2336     struct object_surface *obj_surface;
2337     struct i965_sampler_dndi *sampler_dndi;
2338     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2339     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2340     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
2341     int index;
2342     int w, h;
2343     int orig_w, orig_h;
2344     int dn_strength = 15;
2345     int dndi_top_first = 1;
2346     int dn_progressive = 0;
2347
2348     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
2349         dndi_top_first = 1;
2350         dn_progressive = 1;
2351     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
2352         dndi_top_first = 1;
2353         dn_progressive = 0;
2354     } else {
2355         dndi_top_first = 0;
2356         dn_progressive = 0;
2357     }
2358
2359     if (dn_filter_param) {
2360         float value = dn_filter_param->value;
2361         
2362         if (value > 1.0)
2363             value = 1.0;
2364         
2365         if (value < 0.0)
2366             value = 0.0;
2367
2368         dn_strength = (int)(value * 31.0F);
2369     }
2370
2371     /* surface */
2372     obj_surface = SURFACE(src_surface->id);
2373     orig_w = obj_surface->orig_width;
2374     orig_h = obj_surface->orig_height;
2375     w = obj_surface->width;
2376     h = obj_surface->height;
2377
2378     if (pp_context->stmm.bo == NULL) {
2379         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2380                                            "STMM surface",
2381                                            w * h,
2382                                            4096);
2383         assert(pp_context->stmm.bo);
2384     }
2385
2386     /* source UV surface index 2 */
2387     i965_pp_set_surface_state(ctx, pp_context,
2388                               obj_surface->bo, w * h,
2389                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2390                               2, 0);
2391
2392     /* source YUV surface index 4 */
2393     i965_pp_set_surface2_state(ctx, pp_context,
2394                                obj_surface->bo, 0,
2395                                orig_w, orig_h, w,
2396                                0, h,
2397                                SURFACE_FORMAT_PLANAR_420_8, 1,
2398                                4);
2399
2400     /* source STMM surface index 20 */
2401     i965_pp_set_surface_state(ctx, pp_context,
2402                               pp_context->stmm.bo, 0,
2403                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2404                               20, 1);
2405
2406     /* destination surface */
2407     obj_surface = SURFACE(dst_surface->id);
2408     orig_w = obj_surface->orig_width;
2409     orig_h = obj_surface->orig_height;
2410     w = obj_surface->width;
2411     h = obj_surface->height;
2412
2413     /* destination Y surface index 7 */
2414     i965_pp_set_surface_state(ctx, pp_context,
2415                               obj_surface->bo, 0,
2416                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2417                               7, 1);
2418
2419     /* destination UV surface index 8 */
2420     i965_pp_set_surface_state(ctx, pp_context,
2421                               obj_surface->bo, w * h,
2422                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2423                               8, 1);
2424     /* sampler dn */
2425     dri_bo_map(pp_context->sampler_state_table.bo, True);
2426     assert(pp_context->sampler_state_table.bo->virtual);
2427     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2428     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2429
2430     /* sample dndi index 1 */
2431     index = 0;
2432     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2433     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2434     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2435     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2436
2437     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2438     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
2439     sampler_dndi[index].dw1.stmm_c2 = 0;
2440     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2441     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2442
2443     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
2444     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2445     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2446     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
2447
2448     sampler_dndi[index].dw3.maximum_stmm = 128;
2449     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2450     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2451     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2452     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2453
2454     sampler_dndi[index].dw4.sdi_delta = 8;
2455     sampler_dndi[index].dw4.sdi_threshold = 128;
2456     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2457     sampler_dndi[index].dw4.stmm_shift_up = 0;
2458     sampler_dndi[index].dw4.stmm_shift_down = 0;
2459     sampler_dndi[index].dw4.minimum_stmm = 0;
2460
2461     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
2462     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
2463     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
2464     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
2465
2466     sampler_dndi[index].dw6.dn_enable = 1;
2467     sampler_dndi[index].dw6.di_enable = 0;
2468     sampler_dndi[index].dw6.di_partial = 0;
2469     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2470     sampler_dndi[index].dw6.dndi_stream_id = 1;
2471     sampler_dndi[index].dw6.dndi_first_frame = 1;
2472     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
2473     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
2474     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2475     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2476
2477     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
2478     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
2479     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2480     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
2481
2482     dri_bo_unmap(pp_context->sampler_state_table.bo);
2483
2484     /* private function & data */
2485     pp_context->pp_x_steps = pp_dn_x_steps;
2486     pp_context->pp_y_steps = pp_dn_y_steps;
2487     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
2488
2489     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
2490     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
2491     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
2492     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
2493
2494     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
2495     pp_inline_parameter->grf5.number_blocks = w / 16;
2496     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2497     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2498
2499     pp_dn_context->dest_w = w;
2500     pp_dn_context->dest_h = h;
2501
2502     dst_surface->flags = src_surface->flags;
2503     
2504     return VA_STATUS_SUCCESS;
2505 }
2506
2507 static int
2508 gen7_pp_dndi_x_steps(void *private_context)
2509 {
2510     struct pp_dndi_context *pp_dndi_context = private_context;
2511
2512     return pp_dndi_context->dest_w / 16;
2513 }
2514
2515 static int
2516 gen7_pp_dndi_y_steps(void *private_context)
2517 {
2518     struct pp_dndi_context *pp_dndi_context = private_context;
2519
2520     return pp_dndi_context->dest_h / 4;
2521 }
2522
2523 static int
2524 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2525 {
2526     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2527
2528     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
2529     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
2530
2531     return 0;
2532 }
2533
2534 static VAStatus
2535 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2536                              const struct i965_surface *src_surface,
2537                              const VARectangle *src_rect,
2538                              struct i965_surface *dst_surface,
2539                              const VARectangle *dst_rect,
2540                              void *filter_param)
2541 {
2542     struct i965_driver_data *i965 = i965_driver_data(ctx);
2543     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
2544     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2545     struct object_surface *obj_surface;
2546     struct gen7_sampler_dndi *sampler_dndi;
2547     int index;
2548     int w, h;
2549     int orig_w, orig_h;
2550     int dndi_top_first = 1;
2551
2552     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
2553         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
2554
2555     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
2556         dndi_top_first = 1;
2557     else
2558         dndi_top_first = 0;
2559
2560     /* surface */
2561     obj_surface = SURFACE(src_surface->id);
2562     orig_w = obj_surface->orig_width;
2563     orig_h = obj_surface->orig_height;
2564     w = obj_surface->width;
2565     h = obj_surface->height;
2566
2567     if (pp_context->stmm.bo == NULL) {
2568         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2569                                            "STMM surface",
2570                                            w * h,
2571                                            4096);
2572         assert(pp_context->stmm.bo);
2573     }
2574
2575     /* source UV surface index 1 */
2576     gen7_pp_set_surface_state(ctx, pp_context,
2577                               obj_surface->bo, w * h,
2578                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2579                               1, 0);
2580
2581     /* source YUV surface index 3 */
2582     gen7_pp_set_surface2_state(ctx, pp_context,
2583                                obj_surface->bo, 0,
2584                                orig_w, orig_h, w,
2585                                0, h,
2586                                SURFACE_FORMAT_PLANAR_420_8, 1,
2587                                3);
2588
2589     /* source (temporal reference) YUV surface index 4 */
2590     gen7_pp_set_surface2_state(ctx, pp_context,
2591                                obj_surface->bo, 0,
2592                                orig_w, orig_h, w,
2593                                0, h,
2594                                SURFACE_FORMAT_PLANAR_420_8, 1,
2595                                4);
2596
2597     /* STMM / History Statistics input surface, index 5 */
2598     gen7_pp_set_surface_state(ctx, pp_context,
2599                               pp_context->stmm.bo, 0,
2600                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2601                               5, 1);
2602
2603     /* destination surface */
2604     obj_surface = SURFACE(dst_surface->id);
2605     orig_w = obj_surface->orig_width;
2606     orig_h = obj_surface->orig_height;
2607     w = obj_surface->width;
2608     h = obj_surface->height;
2609
2610     /* destination(Previous frame) Y surface index 27 */
2611     gen7_pp_set_surface_state(ctx, pp_context,
2612                               obj_surface->bo, 0,
2613                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2614                               27, 1);
2615
2616     /* destination(Previous frame) UV surface index 28 */
2617     gen7_pp_set_surface_state(ctx, pp_context,
2618                               obj_surface->bo, w * h,
2619                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2620                               28, 1);
2621
2622     /* destination(Current frame) Y surface index 30 */
2623     gen7_pp_set_surface_state(ctx, pp_context,
2624                               obj_surface->bo, 0,
2625                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2626                               30, 1);
2627
2628     /* destination(Current frame) UV surface index 31 */
2629     gen7_pp_set_surface_state(ctx, pp_context,
2630                               obj_surface->bo, w * h,
2631                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2632                               31, 1);
2633
2634     /* STMM output surface, index 33 */
2635     gen7_pp_set_surface_state(ctx, pp_context,
2636                               pp_context->stmm.bo, 0,
2637                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2638                               33, 1);
2639
2640
2641     /* sampler dndi */
2642     dri_bo_map(pp_context->sampler_state_table.bo, True);
2643     assert(pp_context->sampler_state_table.bo->virtual);
2644     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2645     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2646
2647     /* sample dndi index 0 */
2648     index = 0;
2649     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2650     sampler_dndi[index].dw0.dnmh_delt = 8;
2651     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
2652     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
2653     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2654     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2655
2656     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2657     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
2658     sampler_dndi[index].dw1.stmm_c2 = 0;
2659     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2660     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2661
2662     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
2663     sampler_dndi[index].dw2.bne_edge_th = 1;
2664     sampler_dndi[index].dw2.smooth_mv_th = 0;
2665     sampler_dndi[index].dw2.sad_tight_th = 5;
2666     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
2667     sampler_dndi[index].dw2.good_neighbor_th = 4;
2668
2669     sampler_dndi[index].dw3.maximum_stmm = 128;
2670     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2671     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2672     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2673     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2674
2675     sampler_dndi[index].dw4.sdi_delta = 8;
2676     sampler_dndi[index].dw4.sdi_threshold = 128;
2677     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2678     sampler_dndi[index].dw4.stmm_shift_up = 0;
2679     sampler_dndi[index].dw4.stmm_shift_down = 0;
2680     sampler_dndi[index].dw4.minimum_stmm = 0;
2681
2682     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
2683     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
2684     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
2685     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
2686
2687     sampler_dndi[index].dw6.dn_enable = 0;
2688     sampler_dndi[index].dw6.di_enable = 1;
2689     sampler_dndi[index].dw6.di_partial = 0;
2690     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2691     sampler_dndi[index].dw6.dndi_stream_id = 1;
2692     sampler_dndi[index].dw6.dndi_first_frame = 1;
2693     sampler_dndi[index].dw6.progressive_dn = 0;
2694     sampler_dndi[index].dw6.mcdi_enable = 0;
2695     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
2696     sampler_dndi[index].dw6.cat_th1 = 0;
2697     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2698     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2699
2700     sampler_dndi[index].dw7.sad_tha = 5;
2701     sampler_dndi[index].dw7.sad_thb = 10;
2702     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
2703     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
2704     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
2705     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2706     sampler_dndi[index].dw7.neighborpixel_th = 10;
2707     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
2708
2709     dri_bo_unmap(pp_context->sampler_state_table.bo);
2710
2711     /* private function & data */
2712     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
2713     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
2714     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
2715
2716     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
2717     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
2718     pp_static_parameter->grf1.di_top_field_first = 0;
2719     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2720
2721     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
2722     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
2723     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
2724
2725     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
2726     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
2727
2728     pp_dndi_context->dest_w = w;
2729     pp_dndi_context->dest_h = h;
2730
2731     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
2732
2733     return VA_STATUS_SUCCESS;
2734 }
2735
2736 static int
2737 gen7_pp_dn_x_steps(void *private_context)
2738 {
2739     return 1;
2740 }
2741
2742 static int
2743 gen7_pp_dn_y_steps(void *private_context)
2744 {
2745     struct pp_dn_context *pp_dn_context = private_context;
2746
2747     return pp_dn_context->dest_h / 4;
2748 }
2749
2750 static int
2751 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2752 {
2753     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2754
2755     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2756     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
2757
2758     return 0;
2759 }
2760
2761 static VAStatus
2762 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2763                            const struct i965_surface *src_surface,
2764                            const VARectangle *src_rect,
2765                            struct i965_surface *dst_surface,
2766                            const VARectangle *dst_rect,
2767                            void *filter_param)
2768 {
2769     struct i965_driver_data *i965 = i965_driver_data(ctx);
2770     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
2771     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2772     struct object_surface *obj_surface;
2773     struct gen7_sampler_dndi *sampler_dn;
2774     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
2775     int index;
2776     int w, h;
2777     int orig_w, orig_h;
2778     int dn_strength = 15;
2779     int dndi_top_first = 1;
2780     int dn_progressive = 0;
2781
2782     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
2783         dndi_top_first = 1;
2784         dn_progressive = 1;
2785     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
2786         dndi_top_first = 1;
2787         dn_progressive = 0;
2788     } else {
2789         dndi_top_first = 0;
2790         dn_progressive = 0;
2791     }
2792
2793     if (dn_filter_param) {
2794         float value = dn_filter_param->value;
2795         
2796         if (value > 1.0)
2797             value = 1.0;
2798         
2799         if (value < 0.0)
2800             value = 0.0;
2801
2802         dn_strength = (int)(value * 31.0F);
2803     }
2804
2805     /* surface */
2806     obj_surface = SURFACE(src_surface->id);
2807     orig_w = obj_surface->orig_width;
2808     orig_h = obj_surface->orig_height;
2809     w = obj_surface->width;
2810     h = obj_surface->height;
2811
2812     if (pp_context->stmm.bo == NULL) {
2813         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2814                                            "STMM surface",
2815                                            w * h,
2816                                            4096);
2817         assert(pp_context->stmm.bo);
2818     }
2819
2820     /* source UV surface index 1 */
2821     gen7_pp_set_surface_state(ctx, pp_context,
2822                               obj_surface->bo, w * h,
2823                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2824                               1, 0);
2825
2826     /* source YUV surface index 3 */
2827     gen7_pp_set_surface2_state(ctx, pp_context,
2828                                obj_surface->bo, 0,
2829                                orig_w, orig_h, w,
2830                                0, h,
2831                                SURFACE_FORMAT_PLANAR_420_8, 1,
2832                                3);
2833
2834     /* source STMM surface index 5 */
2835     gen7_pp_set_surface_state(ctx, pp_context,
2836                               pp_context->stmm.bo, 0,
2837                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2838                               5, 1);
2839
2840     /* destination surface */
2841     obj_surface = SURFACE(dst_surface->id);
2842     orig_w = obj_surface->orig_width;
2843     orig_h = obj_surface->orig_height;
2844     w = obj_surface->width;
2845     h = obj_surface->height;
2846
2847     /* destination Y surface index 7 */
2848     gen7_pp_set_surface_state(ctx, pp_context,
2849                               obj_surface->bo, 0,
2850                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2851                               7, 1);
2852
2853     /* destination UV surface index 8 */
2854     gen7_pp_set_surface_state(ctx, pp_context,
2855                               obj_surface->bo, w * h,
2856                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2857                               8, 1);
2858     /* sampler dn */
2859     dri_bo_map(pp_context->sampler_state_table.bo, True);
2860     assert(pp_context->sampler_state_table.bo->virtual);
2861     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
2862     sampler_dn = pp_context->sampler_state_table.bo->virtual;
2863
2864     /* sample dn index 1 */
2865     index = 0;
2866     sampler_dn[index].dw0.denoise_asd_threshold = 0;
2867     sampler_dn[index].dw0.dnmh_delt = 8;
2868     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
2869     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
2870     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
2871     sampler_dn[index].dw0.denoise_stad_threshold = 0;
2872
2873     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2874     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
2875     sampler_dn[index].dw1.stmm_c2 = 0;
2876     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
2877     sampler_dn[index].dw1.temporal_difference_threshold = 16;
2878
2879     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
2880     sampler_dn[index].dw2.bne_edge_th = 1;
2881     sampler_dn[index].dw2.smooth_mv_th = 0;
2882     sampler_dn[index].dw2.sad_tight_th = 5;
2883     sampler_dn[index].dw2.cat_slope_minus1 = 9;
2884     sampler_dn[index].dw2.good_neighbor_th = 4;
2885
2886     sampler_dn[index].dw3.maximum_stmm = 128;
2887     sampler_dn[index].dw3.multipler_for_vecm = 2;
2888     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2889     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2890     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
2891
2892     sampler_dn[index].dw4.sdi_delta = 8;
2893     sampler_dn[index].dw4.sdi_threshold = 128;
2894     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2895     sampler_dn[index].dw4.stmm_shift_up = 0;
2896     sampler_dn[index].dw4.stmm_shift_down = 0;
2897     sampler_dn[index].dw4.minimum_stmm = 0;
2898
2899     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
2900     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
2901     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
2902     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
2903
2904     sampler_dn[index].dw6.dn_enable = 1;
2905     sampler_dn[index].dw6.di_enable = 0;
2906     sampler_dn[index].dw6.di_partial = 0;
2907     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
2908     sampler_dn[index].dw6.dndi_stream_id = 1;
2909     sampler_dn[index].dw6.dndi_first_frame = 1;
2910     sampler_dn[index].dw6.progressive_dn = dn_progressive;
2911     sampler_dn[index].dw6.mcdi_enable = 0;
2912     sampler_dn[index].dw6.fmd_tear_threshold = 32;
2913     sampler_dn[index].dw6.cat_th1 = 0;
2914     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
2915     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
2916
2917     sampler_dn[index].dw7.sad_tha = 5;
2918     sampler_dn[index].dw7.sad_thb = 10;
2919     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
2920     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
2921     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
2922     sampler_dn[index].dw7.vdi_walker_enable = 0;
2923     sampler_dn[index].dw7.neighborpixel_th = 10;
2924     sampler_dn[index].dw7.column_width_minus1 = w / 16;
2925
2926     dri_bo_unmap(pp_context->sampler_state_table.bo);
2927
2928     /* private function & data */
2929     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
2930     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
2931     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
2932
2933     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
2934     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
2935     pp_static_parameter->grf1.di_top_field_first = 0;
2936     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2937
2938     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
2939     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
2940     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
2941
2942     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
2943     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
2944
2945     pp_dn_context->dest_w = w;
2946     pp_dn_context->dest_h = h;
2947
2948     dst_surface->flags = src_surface->flags;
2949
2950     return VA_STATUS_SUCCESS;
2951 }
2952
2953 static VAStatus
2954 ironlake_pp_initialize(
2955     VADriverContextP   ctx,
2956     struct i965_post_processing_context *pp_context,
2957     const struct i965_surface *src_surface,
2958     const VARectangle *src_rect,
2959     struct i965_surface *dst_surface,
2960     const VARectangle *dst_rect,
2961     int                pp_index,
2962     void *filter_param
2963 )
2964 {
2965     VAStatus va_status;
2966     struct i965_driver_data *i965 = i965_driver_data(ctx);
2967     struct pp_module *pp_module;
2968     dri_bo *bo;
2969     int static_param_size, inline_param_size;
2970
2971     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
2972     bo = dri_bo_alloc(i965->intel.bufmgr,
2973                       "surface state & binding table",
2974                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
2975                       4096);
2976     assert(bo);
2977     pp_context->surface_state_binding_table.bo = bo;
2978
2979     dri_bo_unreference(pp_context->curbe.bo);
2980     bo = dri_bo_alloc(i965->intel.bufmgr,
2981                       "constant buffer",
2982                       4096, 
2983                       4096);
2984     assert(bo);
2985     pp_context->curbe.bo = bo;
2986
2987     dri_bo_unreference(pp_context->idrt.bo);
2988     bo = dri_bo_alloc(i965->intel.bufmgr, 
2989                       "interface discriptor", 
2990                       sizeof(struct i965_interface_descriptor), 
2991                       4096);
2992     assert(bo);
2993     pp_context->idrt.bo = bo;
2994     pp_context->idrt.num_interface_descriptors = 0;
2995
2996     dri_bo_unreference(pp_context->sampler_state_table.bo);
2997     bo = dri_bo_alloc(i965->intel.bufmgr, 
2998                       "sampler state table", 
2999                       4096,
3000                       4096);
3001     assert(bo);
3002     dri_bo_map(bo, True);
3003     memset(bo->virtual, 0, bo->size);
3004     dri_bo_unmap(bo);
3005     pp_context->sampler_state_table.bo = bo;
3006
3007     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3008     bo = dri_bo_alloc(i965->intel.bufmgr, 
3009                       "sampler 8x8 state ",
3010                       4096,
3011                       4096);
3012     assert(bo);
3013     pp_context->sampler_state_table.bo_8x8 = bo;
3014
3015     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3016     bo = dri_bo_alloc(i965->intel.bufmgr, 
3017                       "sampler 8x8 state ",
3018                       4096,
3019                       4096);
3020     assert(bo);
3021     pp_context->sampler_state_table.bo_8x8_uv = bo;
3022
3023     dri_bo_unreference(pp_context->vfe_state.bo);
3024     bo = dri_bo_alloc(i965->intel.bufmgr, 
3025                       "vfe state", 
3026                       sizeof(struct i965_vfe_state), 
3027                       4096);
3028     assert(bo);
3029     pp_context->vfe_state.bo = bo;
3030
3031     if (IS_GEN7(i965->intel.device_id)) {
3032         static_param_size = sizeof(struct gen7_pp_static_parameter);
3033         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
3034     } else {
3035         static_param_size = sizeof(struct pp_static_parameter);
3036         inline_param_size = sizeof(struct pp_inline_parameter);
3037     }
3038
3039     memset(pp_context->pp_static_parameter, 0, static_param_size);
3040     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
3041     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3042     pp_context->current_pp = pp_index;
3043     pp_module = &pp_context->pp_modules[pp_index];
3044     
3045     if (pp_module->initialize)
3046         va_status = pp_module->initialize(ctx, pp_context,
3047                                           src_surface,
3048                                           src_rect,
3049                                           dst_surface,
3050                                           dst_rect,
3051                                           filter_param);
3052     else
3053         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3054
3055     return va_status;
3056 }
3057
3058 static VAStatus
3059 ironlake_post_processing(
3060     VADriverContextP   ctx,
3061     struct i965_post_processing_context *pp_context,
3062     const struct i965_surface *src_surface,
3063     const VARectangle *src_rect,
3064     struct i965_surface *dst_surface,
3065     const VARectangle *dst_rect,
3066     int                pp_index,
3067     void *filter_param
3068 )
3069 {
3070     VAStatus va_status;
3071
3072     va_status = ironlake_pp_initialize(ctx, pp_context,
3073                                        src_surface,
3074                                        src_rect,
3075                                        dst_surface,
3076                                        dst_rect,
3077                                        pp_index,
3078                                        filter_param);
3079
3080     if (va_status == VA_STATUS_SUCCESS) {
3081         ironlake_pp_states_setup(ctx, pp_context);
3082         ironlake_pp_pipeline_setup(ctx, pp_context);
3083     }
3084
3085     return va_status;
3086 }
3087
3088 static VAStatus
3089 gen6_pp_initialize(
3090     VADriverContextP   ctx,
3091     struct i965_post_processing_context *pp_context,
3092     const struct i965_surface *src_surface,
3093     const VARectangle *src_rect,
3094     struct i965_surface *dst_surface,
3095     const VARectangle *dst_rect,
3096     int                pp_index,
3097     void *filter_param
3098 )
3099 {
3100     VAStatus va_status;
3101     struct i965_driver_data *i965 = i965_driver_data(ctx);
3102     struct pp_module *pp_module;
3103     dri_bo *bo;
3104     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3105     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3106
3107     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3108     bo = dri_bo_alloc(i965->intel.bufmgr,
3109                       "surface state & binding table",
3110                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3111                       4096);
3112     assert(bo);
3113     pp_context->surface_state_binding_table.bo = bo;
3114
3115     dri_bo_unreference(pp_context->curbe.bo);
3116     bo = dri_bo_alloc(i965->intel.bufmgr,
3117                       "constant buffer",
3118                       4096, 
3119                       4096);
3120     assert(bo);
3121     pp_context->curbe.bo = bo;
3122
3123     dri_bo_unreference(pp_context->idrt.bo);
3124     bo = dri_bo_alloc(i965->intel.bufmgr, 
3125                       "interface discriptor", 
3126                       sizeof(struct gen6_interface_descriptor_data), 
3127                       4096);
3128     assert(bo);
3129     pp_context->idrt.bo = bo;
3130     pp_context->idrt.num_interface_descriptors = 0;
3131
3132     dri_bo_unreference(pp_context->sampler_state_table.bo);
3133     bo = dri_bo_alloc(i965->intel.bufmgr, 
3134                       "sampler state table", 
3135                       4096,
3136                       4096);
3137     assert(bo);
3138     dri_bo_map(bo, True);
3139     memset(bo->virtual, 0, bo->size);
3140     dri_bo_unmap(bo);
3141     pp_context->sampler_state_table.bo = bo;
3142
3143     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3144     bo = dri_bo_alloc(i965->intel.bufmgr, 
3145                       "sampler 8x8 state ",
3146                       4096,
3147                       4096);
3148     assert(bo);
3149     pp_context->sampler_state_table.bo_8x8 = bo;
3150
3151     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3152     bo = dri_bo_alloc(i965->intel.bufmgr, 
3153                       "sampler 8x8 state ",
3154                       4096,
3155                       4096);
3156     assert(bo);
3157     pp_context->sampler_state_table.bo_8x8_uv = bo;
3158
3159     dri_bo_unreference(pp_context->vfe_state.bo);
3160     bo = dri_bo_alloc(i965->intel.bufmgr, 
3161                       "vfe state", 
3162                       sizeof(struct i965_vfe_state), 
3163                       4096);
3164     assert(bo);
3165     pp_context->vfe_state.bo = bo;
3166     
3167     memset(pp_static_parameter, 0, sizeof(*pp_static_parameter));
3168     memset(pp_inline_parameter, 0, sizeof(*pp_inline_parameter));
3169     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3170     pp_context->current_pp = pp_index;
3171     pp_module = &pp_context->pp_modules[pp_index];
3172     
3173     if (pp_module->initialize)
3174         va_status = pp_module->initialize(ctx, pp_context,
3175                                           src_surface,
3176                                           src_rect,
3177                                           dst_surface,
3178                                           dst_rect,
3179                                           filter_param);
3180     else
3181         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3182
3183     return va_status;
3184 }
3185
3186 static void
3187 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
3188                                    struct i965_post_processing_context *pp_context)
3189 {
3190     struct i965_driver_data *i965 = i965_driver_data(ctx);
3191     struct gen6_interface_descriptor_data *desc;
3192     dri_bo *bo;
3193     int pp_index = pp_context->current_pp;
3194
3195     bo = pp_context->idrt.bo;
3196     dri_bo_map(bo, True);
3197     assert(bo->virtual);
3198     desc = bo->virtual;
3199     memset(desc, 0, sizeof(*desc));
3200     desc->desc0.kernel_start_pointer = 
3201         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
3202     desc->desc1.single_program_flow = 1;
3203     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
3204     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
3205     desc->desc2.sampler_state_pointer = 
3206         pp_context->sampler_state_table.bo->offset >> 5;
3207     desc->desc3.binding_table_entry_count = 0;
3208     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
3209     desc->desc4.constant_urb_entry_read_offset = 0;
3210
3211     if (IS_GEN7(i965->intel.device_id))
3212         desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
3213     else
3214         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
3215
3216     dri_bo_emit_reloc(bo,
3217                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3218                       0,
3219                       offsetof(struct gen6_interface_descriptor_data, desc0),
3220                       pp_context->pp_modules[pp_index].kernel.bo);
3221
3222     dri_bo_emit_reloc(bo,
3223                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3224                       desc->desc2.sampler_count << 2,
3225                       offsetof(struct gen6_interface_descriptor_data, desc2),
3226                       pp_context->sampler_state_table.bo);
3227
3228     dri_bo_unmap(bo);
3229     pp_context->idrt.num_interface_descriptors++;
3230 }
3231
3232 static void
3233 gen6_pp_upload_constants(VADriverContextP ctx,
3234                          struct i965_post_processing_context *pp_context)
3235 {
3236     struct i965_driver_data *i965 = i965_driver_data(ctx);
3237     unsigned char *constant_buffer;
3238     int param_size;
3239
3240     assert(sizeof(struct pp_static_parameter) == 128);
3241     assert(sizeof(struct gen7_pp_static_parameter) == 192);
3242
3243     if (IS_GEN7(i965->intel.device_id))
3244         param_size = sizeof(struct gen7_pp_static_parameter);
3245     else
3246         param_size = sizeof(struct pp_static_parameter);
3247
3248     dri_bo_map(pp_context->curbe.bo, 1);
3249     assert(pp_context->curbe.bo->virtual);
3250     constant_buffer = pp_context->curbe.bo->virtual;
3251     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
3252     dri_bo_unmap(pp_context->curbe.bo);
3253 }
3254
3255 static void
3256 gen6_pp_states_setup(VADriverContextP ctx,
3257                      struct i965_post_processing_context *pp_context)
3258 {
3259     gen6_pp_interface_descriptor_table(ctx, pp_context);
3260     gen6_pp_upload_constants(ctx, pp_context);
3261 }
3262
3263 static void
3264 gen6_pp_pipeline_select(VADriverContextP ctx,
3265                         struct i965_post_processing_context *pp_context)
3266 {
3267     struct intel_batchbuffer *batch = pp_context->batch;
3268
3269     BEGIN_BATCH(batch, 1);
3270     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
3271     ADVANCE_BATCH(batch);
3272 }
3273
3274 static void
3275 gen6_pp_state_base_address(VADriverContextP ctx,
3276                            struct i965_post_processing_context *pp_context)
3277 {
3278     struct intel_batchbuffer *batch = pp_context->batch;
3279
3280     BEGIN_BATCH(batch, 10);
3281     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
3282     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3283     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
3284     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3285     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3286     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3287     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3288     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3289     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3290     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3291     ADVANCE_BATCH(batch);
3292 }
3293
3294 static void
3295 gen6_pp_vfe_state(VADriverContextP ctx,
3296                   struct i965_post_processing_context *pp_context)
3297 {
3298     struct intel_batchbuffer *batch = pp_context->batch;
3299
3300     BEGIN_BATCH(batch, 8);
3301     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
3302     OUT_BATCH(batch, 0);
3303     OUT_BATCH(batch,
3304               (pp_context->urb.num_vfe_entries - 1) << 16 |
3305               pp_context->urb.num_vfe_entries << 8);
3306     OUT_BATCH(batch, 0);
3307     OUT_BATCH(batch,
3308               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* URB Entry Allocation Size, in 256 bits unit */
3309               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2)); /* CURBE Allocation Size, in 256 bits unit */
3310     OUT_BATCH(batch, 0);
3311     OUT_BATCH(batch, 0);
3312     OUT_BATCH(batch, 0);
3313     ADVANCE_BATCH(batch);
3314 }
3315
3316 static void
3317 gen6_pp_curbe_load(VADriverContextP ctx,
3318                    struct i965_post_processing_context *pp_context)
3319 {
3320     struct intel_batchbuffer *batch = pp_context->batch;
3321
3322     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32 <= pp_context->curbe.bo->size);
3323
3324     BEGIN_BATCH(batch, 4);
3325     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
3326     OUT_BATCH(batch, 0);
3327     OUT_BATCH(batch,
3328               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32);
3329     OUT_RELOC(batch, 
3330               pp_context->curbe.bo,
3331               I915_GEM_DOMAIN_INSTRUCTION, 0,
3332               0);
3333     ADVANCE_BATCH(batch);
3334 }
3335
3336 static void
3337 gen6_interface_descriptor_load(VADriverContextP ctx,
3338                                struct i965_post_processing_context *pp_context)
3339 {
3340     struct intel_batchbuffer *batch = pp_context->batch;
3341
3342     BEGIN_BATCH(batch, 4);
3343     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
3344     OUT_BATCH(batch, 0);
3345     OUT_BATCH(batch,
3346               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
3347     OUT_RELOC(batch, 
3348               pp_context->idrt.bo,
3349               I915_GEM_DOMAIN_INSTRUCTION, 0,
3350               0);
3351     ADVANCE_BATCH(batch);
3352 }
3353
3354 static void
3355 gen6_pp_object_walker(VADriverContextP ctx,
3356                       struct i965_post_processing_context *pp_context)
3357 {
3358     struct i965_driver_data *i965 = i965_driver_data(ctx);
3359     struct intel_batchbuffer *batch = pp_context->batch;
3360     int x, x_steps, y, y_steps;
3361     int param_size, command_length_in_dws;
3362     dri_bo *command_buffer;
3363     unsigned int *command_ptr;
3364
3365     if (IS_GEN7(i965->intel.device_id))
3366         param_size = sizeof(struct gen7_pp_inline_parameter);
3367     else
3368         param_size = sizeof(struct pp_inline_parameter);
3369
3370     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
3371     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
3372     command_length_in_dws = 6 + (param_size >> 2);
3373     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
3374                                   "command objects buffer",
3375                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
3376                                   4096);
3377
3378     dri_bo_map(command_buffer, 1);
3379     command_ptr = command_buffer->virtual;
3380
3381     for (y = 0; y < y_steps; y++) {
3382         for (x = 0; x < x_steps; x++) {
3383             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
3384                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
3385                 *command_ptr++ = 0;
3386                 *command_ptr++ = 0;
3387                 *command_ptr++ = 0;
3388                 *command_ptr++ = 0;
3389                 *command_ptr++ = 0;
3390                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
3391                 command_ptr += (param_size >> 2);
3392             }
3393         }
3394     }
3395
3396     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
3397         *command_ptr++ = 0;
3398
3399     *command_ptr = MI_BATCH_BUFFER_END;
3400
3401     dri_bo_unmap(command_buffer);
3402
3403     BEGIN_BATCH(batch, 2);
3404     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
3405     OUT_RELOC(batch, command_buffer, 
3406               I915_GEM_DOMAIN_COMMAND, 0, 
3407               0);
3408     ADVANCE_BATCH(batch);
3409     
3410     dri_bo_unreference(command_buffer);
3411
3412     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
3413      * will cause control to pass back to ring buffer 
3414      */
3415     intel_batchbuffer_end_atomic(batch);
3416     intel_batchbuffer_flush(batch);
3417     intel_batchbuffer_start_atomic(batch, 0x1000);
3418 }
3419
3420 static void
3421 gen6_pp_pipeline_setup(VADriverContextP ctx,
3422                        struct i965_post_processing_context *pp_context)
3423 {
3424     struct intel_batchbuffer *batch = pp_context->batch;
3425
3426     intel_batchbuffer_start_atomic(batch, 0x1000);
3427     intel_batchbuffer_emit_mi_flush(batch);
3428     gen6_pp_pipeline_select(ctx, pp_context);
3429     gen6_pp_state_base_address(ctx, pp_context);
3430     gen6_pp_vfe_state(ctx, pp_context);
3431     gen6_pp_curbe_load(ctx, pp_context);
3432     gen6_interface_descriptor_load(ctx, pp_context);
3433     gen6_pp_object_walker(ctx, pp_context);
3434     intel_batchbuffer_end_atomic(batch);
3435 }
3436
3437 static VAStatus
3438 gen6_post_processing(
3439     VADriverContextP   ctx,
3440     struct i965_post_processing_context *pp_context,
3441     const struct i965_surface *src_surface,
3442     const VARectangle *src_rect,
3443     struct i965_surface *dst_surface,
3444     const VARectangle *dst_rect,
3445     int                pp_index,
3446     void * filter_param
3447 )
3448 {
3449     VAStatus va_status;
3450     
3451     va_status = gen6_pp_initialize(ctx, pp_context,
3452                                    src_surface,
3453                                    src_rect,
3454                                    dst_surface,
3455                                    dst_rect,
3456                                    pp_index,
3457                                    filter_param);
3458
3459     if (va_status == VA_STATUS_SUCCESS) {
3460         gen6_pp_states_setup(ctx, pp_context);
3461         gen6_pp_pipeline_setup(ctx, pp_context);
3462     }
3463
3464     return va_status;
3465 }
3466
3467 static VAStatus
3468 i965_post_processing_internal(
3469     VADriverContextP   ctx,
3470     struct i965_post_processing_context *pp_context,
3471     const struct i965_surface *src_surface,
3472     const VARectangle *src_rect,
3473     struct i965_surface *dst_surface,
3474     const VARectangle *dst_rect,
3475     int                pp_index,
3476     void *filter_param
3477 )
3478 {
3479     struct i965_driver_data *i965 = i965_driver_data(ctx);
3480     VAStatus va_status;
3481
3482     if (IS_GEN6(i965->intel.device_id) ||
3483         IS_GEN7(i965->intel.device_id))
3484         va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
3485     else
3486         va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
3487     
3488     return va_status;
3489 }
3490
3491 VAStatus 
3492 i965_DestroySurfaces(VADriverContextP ctx,
3493                      VASurfaceID *surface_list,
3494                      int num_surfaces);
3495 VAStatus 
3496 i965_CreateSurfaces(VADriverContextP ctx,
3497                     int width,
3498                     int height,
3499                     int format,
3500                     int num_surfaces,
3501                     VASurfaceID *surfaces);
3502
3503 static void
3504 rgb_to_yuv(unsigned int argb,
3505            unsigned char *y,
3506            unsigned char *u,
3507            unsigned char *v,
3508            unsigned char *a)
3509 {
3510     int r = ((argb >> 16) & 0xff);
3511     int g = ((argb >> 8) & 0xff);
3512     int b = ((argb >> 0) & 0xff);
3513     
3514     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
3515     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
3516     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
3517     *a = ((argb >> 24) & 0xff);
3518 }
3519
3520 static void 
3521 i965_vpp_clear_surface(VADriverContextP ctx,
3522                        struct i965_post_processing_context *pp_context,
3523                        VASurfaceID surface,
3524                        unsigned int color)
3525 {
3526     struct i965_driver_data *i965 = i965_driver_data(ctx);
3527     struct intel_batchbuffer *batch = pp_context->batch;
3528     struct object_surface *obj_surface = SURFACE(surface);
3529     unsigned int blt_cmd, br13;
3530     unsigned int tiling = 0, swizzle = 0;
3531     int pitch;
3532     unsigned char y, u, v, a;
3533
3534     /* Currently only support NV12 surface */
3535     if (!obj_surface || obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
3536         return;
3537
3538     rgb_to_yuv(color, &y, &u, &v, &a);
3539
3540     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
3541     blt_cmd = XY_COLOR_BLT_CMD;
3542     pitch = obj_surface->width;
3543
3544     if (tiling != I915_TILING_NONE) {
3545         blt_cmd |= XY_COLOR_BLT_DST_TILED;
3546         pitch >>= 2;
3547     }
3548
3549     br13 = 0xf0 << 16;
3550     br13 |= BR13_8;
3551     br13 |= pitch;
3552
3553     if (IS_GEN6(i965->intel.device_id) ||
3554         IS_GEN7(i965->intel.device_id)) {
3555         intel_batchbuffer_start_atomic_blt(batch, 48);
3556         BEGIN_BLT_BATCH(batch, 12);
3557     } else {
3558         intel_batchbuffer_start_atomic(batch, 48);
3559         BEGIN_BATCH(batch, 12);
3560     }
3561
3562     OUT_BATCH(batch, blt_cmd);
3563     OUT_BATCH(batch, br13);
3564     OUT_BATCH(batch,
3565               0 << 16 |
3566               0);
3567     OUT_BATCH(batch,
3568               obj_surface->height << 16 |
3569               obj_surface->width);
3570     OUT_RELOC(batch, obj_surface->bo, 
3571               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
3572               0);
3573     OUT_BATCH(batch, y);
3574
3575     br13 = 0xf0 << 16;
3576     br13 |= BR13_565;
3577     br13 |= pitch;
3578
3579     OUT_BATCH(batch, blt_cmd);
3580     OUT_BATCH(batch, br13);
3581     OUT_BATCH(batch,
3582               0 << 16 |
3583               0);
3584     OUT_BATCH(batch,
3585               obj_surface->height / 2 << 16 |
3586               obj_surface->width / 2);
3587     OUT_RELOC(batch, obj_surface->bo, 
3588               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
3589               obj_surface->width * obj_surface->y_cb_offset);
3590     OUT_BATCH(batch, v << 8 | u);
3591
3592     ADVANCE_BATCH(batch);
3593     intel_batchbuffer_end_atomic(batch);
3594 }
3595
3596 VASurfaceID
3597 i965_post_processing(
3598     VADriverContextP   ctx,
3599     VASurfaceID        surface,
3600     const VARectangle *src_rect,
3601     const VARectangle *dst_rect,
3602     unsigned int       flags,
3603     int               *has_done_scaling  
3604 )
3605 {
3606     struct i965_driver_data *i965 = i965_driver_data(ctx);
3607     VASurfaceID in_surface_id = surface;
3608     VASurfaceID out_surface_id = VA_INVALID_ID;
3609     
3610     *has_done_scaling = 0;
3611
3612     if (HAS_PP(i965)) {
3613         struct object_surface *obj_surface;
3614         VAStatus status;
3615         struct i965_surface src_surface;
3616         struct i965_surface dst_surface;
3617
3618         obj_surface = SURFACE(in_surface_id);
3619
3620         /* Currently only support post processing for NV12 surface */
3621         if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
3622             return out_surface_id;
3623
3624         _i965LockMutex(&i965->pp_mutex);
3625
3626         if (flags & I965_PP_FLAG_MCDI) {
3627             status = i965_CreateSurfaces(ctx,
3628                                          obj_surface->orig_width,
3629                                          obj_surface->orig_height,
3630                                          VA_RT_FORMAT_YUV420,
3631                                          1,
3632                                          &out_surface_id);
3633             assert(status == VA_STATUS_SUCCESS);
3634             obj_surface = SURFACE(out_surface_id);
3635             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
3636             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
3637             src_surface.id = in_surface_id;
3638             src_surface.type = I965_SURFACE_TYPE_SURFACE;
3639             src_surface.flags = (flags & I965_PP_FLAG_TOP_FIELD) ? 
3640                 I965_SURFACE_FLAG_TOP_FIELD_FIRST : I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST;
3641             dst_surface.id = out_surface_id;
3642             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
3643             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
3644
3645             i965_post_processing_internal(ctx, i965->pp_context,
3646                                           &src_surface,
3647                                           src_rect,
3648                                           &dst_surface,
3649                                           dst_rect,
3650                                           PP_NV12_DNDI,
3651                                           NULL);
3652         }
3653
3654         if (flags & I965_PP_FLAG_AVS) {
3655             struct i965_render_state *render_state = &i965->render_state;
3656             struct intel_region *dest_region = render_state->draw_region;
3657
3658             if (out_surface_id != VA_INVALID_ID)
3659                 in_surface_id = out_surface_id;
3660
3661             status = i965_CreateSurfaces(ctx,
3662                                          dest_region->width,
3663                                          dest_region->height,
3664                                          VA_RT_FORMAT_YUV420,
3665                                          1,
3666                                          &out_surface_id);
3667             assert(status == VA_STATUS_SUCCESS);
3668             obj_surface = SURFACE(out_surface_id);
3669             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
3670             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
3671             src_surface.id = in_surface_id;
3672             src_surface.type = I965_SURFACE_TYPE_SURFACE;
3673             src_surface.flags = I965_SURFACE_FLAG_FRAME;
3674             dst_surface.id = out_surface_id;
3675             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
3676             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
3677
3678             i965_post_processing_internal(ctx, i965->pp_context,
3679                                           &src_surface,
3680                                           src_rect,
3681                                           &dst_surface,
3682                                           dst_rect,
3683                                           PP_NV12_AVS,
3684                                           NULL);
3685
3686             if (in_surface_id != surface)
3687                 i965_DestroySurfaces(ctx, &in_surface_id, 1);
3688                 
3689             *has_done_scaling = 1;
3690         }
3691
3692         _i965UnlockMutex(&i965->pp_mutex);
3693     }
3694
3695     return out_surface_id;
3696 }       
3697
3698 static VAStatus
3699 i965_image_pl3_processing(VADriverContextP ctx,
3700                           const struct i965_surface *src_surface,
3701                           const VARectangle *src_rect,
3702                           struct i965_surface *dst_surface,
3703                           const VARectangle *dst_rect)
3704 {
3705     struct i965_driver_data *i965 = i965_driver_data(ctx);
3706     struct i965_post_processing_context *pp_context = i965->pp_context;
3707     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
3708
3709     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
3710         i965_post_processing_internal(ctx, i965->pp_context,
3711                                       src_surface,
3712                                       src_rect,
3713                                       dst_surface,
3714                                       dst_rect,
3715                                       PP_PL3_LOAD_SAVE_N12,
3716                                       NULL);
3717     } else {
3718         i965_post_processing_internal(ctx, i965->pp_context,
3719                                       src_surface,
3720                                       src_rect,
3721                                       dst_surface,
3722                                       dst_rect,
3723                                       PP_PL3_LOAD_SAVE_PL3,
3724                                       NULL);
3725     }
3726
3727     intel_batchbuffer_flush(pp_context->batch);
3728
3729     return VA_STATUS_SUCCESS;
3730 }
3731
3732 static VAStatus
3733 i965_image_pl2_processing(VADriverContextP ctx,
3734                           const struct i965_surface *src_surface,
3735                           const VARectangle *src_rect,
3736                           struct i965_surface *dst_surface,
3737                           const VARectangle *dst_rect)
3738 {
3739     struct i965_driver_data *i965 = i965_driver_data(ctx);
3740     struct i965_post_processing_context *pp_context = i965->pp_context;
3741     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
3742
3743     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
3744         i965_post_processing_internal(ctx, i965->pp_context,
3745                                       src_surface,
3746                                       src_rect,
3747                                       dst_surface,
3748                                       dst_rect,
3749                                       PP_NV12_LOAD_SAVE_N12,
3750                                       NULL);
3751     } else {
3752         i965_post_processing_internal(ctx, i965->pp_context,
3753                                       src_surface,
3754                                       src_rect,
3755                                       dst_surface,
3756                                       dst_rect,
3757                                       PP_NV12_LOAD_SAVE_PL3,
3758                                       NULL);
3759     }
3760
3761     intel_batchbuffer_flush(pp_context->batch);
3762
3763     return VA_STATUS_SUCCESS;
3764 }
3765
3766 VAStatus
3767 i965_image_processing(VADriverContextP ctx,
3768                       const struct i965_surface *src_surface,
3769                       const VARectangle *src_rect,
3770                       struct i965_surface *dst_surface,
3771                       const VARectangle *dst_rect)
3772 {
3773     struct i965_driver_data *i965 = i965_driver_data(ctx);
3774     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
3775
3776     if (HAS_PP(i965)) {
3777         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
3778
3779         _i965LockMutex(&i965->pp_mutex);
3780
3781         switch (fourcc) {
3782         case VA_FOURCC('Y', 'V', '1', '2'):
3783         case VA_FOURCC('I', '4', '2', '0'):
3784         case VA_FOURCC('I', 'M', 'C', '1'):
3785         case VA_FOURCC('I', 'M', 'C', '3'):
3786             status = i965_image_pl3_processing(ctx,
3787                                                src_surface,
3788                                                src_rect,
3789                                                dst_surface,
3790                                                dst_rect);
3791             break;
3792
3793         case  VA_FOURCC('N', 'V', '1', '2'):
3794             status = i965_image_pl2_processing(ctx,
3795                                                src_surface,
3796                                                src_rect,
3797                                                dst_surface,
3798                                                dst_rect);
3799             break;
3800
3801         default:
3802             status = VA_STATUS_ERROR_UNIMPLEMENTED;
3803             break;
3804         }
3805         
3806         _i965UnlockMutex(&i965->pp_mutex);
3807     }
3808
3809     return status;
3810 }       
3811
3812 static void
3813 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
3814 {
3815     int i;
3816
3817     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3818     pp_context->surface_state_binding_table.bo = NULL;
3819
3820     dri_bo_unreference(pp_context->curbe.bo);
3821     pp_context->curbe.bo = NULL;
3822
3823     dri_bo_unreference(pp_context->sampler_state_table.bo);
3824     pp_context->sampler_state_table.bo = NULL;
3825
3826     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3827     pp_context->sampler_state_table.bo_8x8 = NULL;
3828
3829     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3830     pp_context->sampler_state_table.bo_8x8_uv = NULL;
3831
3832     dri_bo_unreference(pp_context->idrt.bo);
3833     pp_context->idrt.bo = NULL;
3834     pp_context->idrt.num_interface_descriptors = 0;
3835
3836     dri_bo_unreference(pp_context->vfe_state.bo);
3837     pp_context->vfe_state.bo = NULL;
3838
3839     dri_bo_unreference(pp_context->stmm.bo);
3840     pp_context->stmm.bo = NULL;
3841
3842     for (i = 0; i < NUM_PP_MODULES; i++) {
3843         struct pp_module *pp_module = &pp_context->pp_modules[i];
3844
3845         dri_bo_unreference(pp_module->kernel.bo);
3846         pp_module->kernel.bo = NULL;
3847     }
3848
3849     free(pp_context->pp_static_parameter);
3850     free(pp_context->pp_inline_parameter);
3851     pp_context->pp_static_parameter = NULL;
3852     pp_context->pp_inline_parameter = NULL;
3853 }
3854
3855 Bool
3856 i965_post_processing_terminate(VADriverContextP ctx)
3857 {
3858     struct i965_driver_data *i965 = i965_driver_data(ctx);
3859     struct i965_post_processing_context *pp_context = i965->pp_context;
3860
3861     if (pp_context) {
3862         i965_post_processing_context_finalize(pp_context);
3863         free(pp_context);
3864     }
3865
3866     i965->pp_context = NULL;
3867
3868     return True;
3869 }
3870
3871 static void
3872 i965_post_processing_context_init(VADriverContextP ctx,
3873                                   struct i965_post_processing_context *pp_context,
3874                                   struct intel_batchbuffer *batch)
3875 {
3876     struct i965_driver_data *i965 = i965_driver_data(ctx);
3877     int i;
3878
3879     pp_context->urb.size = URB_SIZE((&i965->intel));
3880     pp_context->urb.num_vfe_entries = 32;
3881     pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
3882     pp_context->urb.num_cs_entries = 1;
3883     
3884     if (IS_GEN7(i965->intel.device_id))
3885         pp_context->urb.size_cs_entry = 4;      /* in 512 bits unit */
3886     else
3887         pp_context->urb.size_cs_entry = 2;
3888
3889     pp_context->urb.vfe_start = 0;
3890     pp_context->urb.cs_start = pp_context->urb.vfe_start + 
3891         pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
3892     assert(pp_context->urb.cs_start + 
3893            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
3894
3895     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
3896     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
3897     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
3898
3899     if (IS_GEN7(i965->intel.device_id))
3900         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
3901     else if (IS_GEN6(i965->intel.device_id))
3902         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
3903     else if (IS_IRONLAKE(i965->intel.device_id))
3904         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
3905
3906     for (i = 0; i < NUM_PP_MODULES; i++) {
3907         struct pp_module *pp_module = &pp_context->pp_modules[i];
3908         dri_bo_unreference(pp_module->kernel.bo);
3909         if (pp_module->kernel.bin && pp_module->kernel.size) {
3910             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
3911                                                 pp_module->kernel.name,
3912                                                 pp_module->kernel.size,
3913                                                 4096);
3914             assert(pp_module->kernel.bo);
3915             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
3916         } else {
3917             pp_module->kernel.bo = NULL;
3918         }
3919     }
3920
3921     /* static & inline parameters */
3922     if (IS_GEN7(i965->intel.device_id)) {
3923         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
3924         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
3925     } else {
3926         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
3927         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
3928     }
3929
3930     pp_context->batch = batch;
3931 }
3932
3933 Bool
3934 i965_post_processing_init(VADriverContextP ctx)
3935 {
3936     struct i965_driver_data *i965 = i965_driver_data(ctx);
3937     struct i965_post_processing_context *pp_context = i965->pp_context;
3938
3939     if (HAS_PP(i965)) {
3940         if (pp_context == NULL) {
3941             pp_context = calloc(1, sizeof(*pp_context));
3942             i965_post_processing_context_init(ctx, pp_context, i965->batch);
3943             i965->pp_context = pp_context;
3944         }
3945     }
3946
3947     return True;
3948 }
3949
3950 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
3951     PP_NULL,    /* VAProcFilterNone */
3952     PP_NV12_DN, /* VAProcFilterNoiseReduction */
3953     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
3954     PP_NULL,    /* VAProcFilterSharpening */
3955     PP_NULL,    /* VAProcFilterColorBalance */
3956     PP_NULL,    /* VAProcFilterColorStandard */
3957 };
3958
3959 static const int proc_frame_to_pp_frame[3] = {
3960     I965_SURFACE_FLAG_FRAME,
3961     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
3962     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
3963 };
3964
3965 static void 
3966 i965_proc_picture(VADriverContextP ctx, 
3967                   VAProfile profile, 
3968                   union codec_state *codec_state,
3969                   struct hw_context *hw_context)
3970 {
3971     struct i965_driver_data *i965 = i965_driver_data(ctx);
3972     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
3973     struct proc_state *proc_state = &codec_state->proc;
3974     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
3975     struct object_surface *obj_surface;
3976     struct i965_surface src_surface, dst_surface;
3977     VARectangle src_rect, dst_rect;
3978     VAStatus status;
3979     int i;
3980     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
3981     int num_tmp_surfaces = 0;
3982     unsigned int tiling = 0, swizzle = 0;
3983     int in_width, in_height;
3984
3985     assert(pipeline_param->surface != VA_INVALID_ID);
3986     assert(proc_state->current_render_target != VA_INVALID_ID);
3987
3988     obj_surface = SURFACE(pipeline_param->surface);
3989     in_width = obj_surface->orig_width;
3990     in_height = obj_surface->orig_height;
3991     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
3992
3993     src_surface.id = pipeline_param->surface;
3994     src_surface.type = I965_SURFACE_TYPE_SURFACE;
3995     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
3996
3997     if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) {
3998         VASurfaceID out_surface_id = VA_INVALID_ID;
3999
4000         src_surface.id = pipeline_param->surface;
4001         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4002         src_surface.flags = I965_SURFACE_FLAG_FRAME;
4003         src_rect.x = 0;
4004         src_rect.y = 0;
4005         src_rect.width = in_width;
4006         src_rect.height = in_height;
4007
4008         status = i965_CreateSurfaces(ctx,
4009                                      in_width,
4010                                      in_height,
4011                                      VA_RT_FORMAT_YUV420,
4012                                      1,
4013                                      &out_surface_id);
4014         assert(status == VA_STATUS_SUCCESS);
4015         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4016         obj_surface = SURFACE(out_surface_id);
4017         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
4018
4019         dst_surface.id = out_surface_id;
4020         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4021         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4022         dst_rect.x = 0;
4023         dst_rect.y = 0;
4024         dst_rect.width = in_width;
4025         dst_rect.height = in_height;
4026
4027         status = i965_image_processing(ctx,
4028                                        &src_surface,
4029                                        &src_rect,
4030                                        &dst_surface,
4031                                        &dst_rect);
4032         assert(status == VA_STATUS_SUCCESS);
4033
4034         src_surface.id = out_surface_id;
4035         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4036         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
4037     }
4038
4039     if (pipeline_param->surface_region) {
4040         src_rect.x = pipeline_param->surface_region->x;
4041         src_rect.y = pipeline_param->surface_region->y;
4042         src_rect.width = pipeline_param->surface_region->width;
4043         src_rect.height = pipeline_param->surface_region->height;
4044     } else {
4045         src_rect.x = 0;
4046         src_rect.y = 0;
4047         src_rect.width = in_width;
4048         src_rect.height = in_height;
4049     }
4050
4051     if (pipeline_param->output_region) {
4052         dst_rect.x = pipeline_param->output_region->x;
4053         dst_rect.y = pipeline_param->output_region->y;
4054         dst_rect.width = pipeline_param->output_region->width;
4055         dst_rect.height = pipeline_param->output_region->height;
4056     } else {
4057         dst_rect.x = 0;
4058         dst_rect.y = 0;
4059         dst_rect.width = in_width;
4060         dst_rect.height = in_height;
4061     }
4062
4063     obj_surface = SURFACE(proc_state->current_render_target);
4064     i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4065     i965_vpp_clear_surface(ctx, i965->pp_context, proc_state->current_render_target, pipeline_param->output_background_color); 
4066     
4067     for (i = 0; i < pipeline_param->num_filters; i++) {
4068         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
4069         VAProcFilterParameterBufferBase *filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
4070         VAProcFilterType filter_type = filter_param->type;
4071         VASurfaceID out_surface_id = VA_INVALID_ID;
4072         int kernel_index = procfilter_to_pp_flag[filter_type];
4073
4074         if (kernel_index != PP_NULL &&
4075             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
4076             status = i965_CreateSurfaces(ctx,
4077                                          in_width,
4078                                          in_height,
4079                                          VA_RT_FORMAT_YUV420,
4080                                          1,
4081                                          &out_surface_id);
4082             assert(status == VA_STATUS_SUCCESS);
4083             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4084             obj_surface = SURFACE(out_surface_id);
4085             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4086             dst_surface.id = out_surface_id;
4087             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4088             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
4089                                                    &src_surface,
4090                                                    &src_rect,
4091                                                    &dst_surface,
4092                                                    &src_rect,
4093                                                    kernel_index,
4094                                                    filter_param);
4095
4096             if (status == VA_STATUS_SUCCESS) {
4097                 src_surface.id = dst_surface.id;
4098                 src_surface.type = dst_surface.type;
4099                 src_surface.flags = dst_surface.flags;
4100             }
4101         }
4102     }
4103
4104     dst_surface.id = proc_state->current_render_target;
4105     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4106
4107     if (src_rect.width == dst_rect.width &&
4108         src_rect.height == dst_rect.height) {
4109         i965_post_processing_internal(ctx, &proc_context->pp_context,
4110                                       &src_surface,
4111                                       &src_rect,
4112                                       &dst_surface,
4113                                       &dst_rect,
4114                                       PP_NV12_LOAD_SAVE_N12,
4115                                       NULL);
4116     } else {
4117
4118         i965_post_processing_internal(ctx, &proc_context->pp_context,
4119                                       &src_surface,
4120                                       &src_rect,
4121                                       &dst_surface,
4122                                       &dst_rect,
4123                                       (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC ?
4124                                       PP_NV12_AVS : PP_NV12_SCALING,
4125                                       NULL);
4126     }
4127
4128     if (num_tmp_surfaces)
4129         i965_DestroySurfaces(ctx,
4130                              tmp_surfaces,
4131                              num_tmp_surfaces);
4132
4133     intel_batchbuffer_flush(hw_context->batch);
4134 }
4135
4136 static void
4137 i965_proc_context_destroy(void *hw_context)
4138 {
4139     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
4140
4141     i965_post_processing_context_finalize(&proc_context->pp_context);
4142     intel_batchbuffer_free(proc_context->base.batch);
4143     free(proc_context);
4144 }
4145
4146 struct hw_context *
4147 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
4148 {
4149     struct intel_driver_data *intel = intel_driver_data(ctx);
4150     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
4151
4152     proc_context->base.destroy = i965_proc_context_destroy;
4153     proc_context->base.run = i965_proc_picture;
4154     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
4155     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
4156
4157     return (struct hw_context *)proc_context;
4158 }