render: fix rendering of interlaced surfaces.
[profile/ivi/vaapi-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41
42 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
43                      IS_GEN6((ctx)->intel.device_id) ||         \
44                      IS_GEN7((ctx)->intel.device_id))
45
46 #define SURFACE_STATE_PADDED_SIZE_0_I965        ALIGN(sizeof(struct i965_surface_state), 32)
47 #define SURFACE_STATE_PADDED_SIZE_1_I965        ALIGN(sizeof(struct i965_surface_state2), 32)
48 #define SURFACE_STATE_PADDED_SIZE_I965          MAX(SURFACE_STATE_PADDED_SIZE_0_I965, SURFACE_STATE_PADDED_SIZE_1_I965)
49
50 #define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
51 #define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
52 #define SURFACE_STATE_PADDED_SIZE_GEN7          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
53
54 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
55 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
56 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
57
58 static const uint32_t pp_null_gen5[][4] = {
59 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
60 };
61
62 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
63 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
64 };
65
66 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
67 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
68 };
69
70 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
71 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
72 };
73
74 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
75 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
76 };
77
78 static const uint32_t pp_nv12_scaling_gen5[][4] = {
79 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
80 };
81
82 static const uint32_t pp_nv12_avs_gen5[][4] = {
83 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
84 };
85
86 static const uint32_t pp_nv12_dndi_gen5[][4] = {
87 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
88 };
89
90 static const uint32_t pp_nv12_dn_gen5[][4] = {
91 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
92 };
93
94 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
95                                    const struct i965_surface *src_surface,
96                                    const VARectangle *src_rect,
97                                    struct i965_surface *dst_surface,
98                                    const VARectangle *dst_rect,
99                                    void *filter_param);
100 static VAStatus pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
101                                             const struct i965_surface *src_surface,
102                                             const VARectangle *src_rect,
103                                             struct i965_surface *dst_surface,
104                                             const VARectangle *dst_rect,
105                                             void *filter_param);
106 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
107                                            const struct i965_surface *src_surface,
108                                            const VARectangle *src_rect,
109                                            struct i965_surface *dst_surface,
110                                            const VARectangle *dst_rect,
111                                            void *filter_param);
112 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
113                                              const struct i965_surface *src_surface,
114                                              const VARectangle *src_rect,
115                                              struct i965_surface *dst_surface,
116                                              const VARectangle *dst_rect,
117                                              void *filter_param);
118 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
119                                                 const struct i965_surface *src_surface,
120                                                 const VARectangle *src_rect,
121                                                 struct i965_surface *dst_surface,
122                                                 const VARectangle *dst_rect,
123                                                 void *filter_param);
124 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
125                                         const struct i965_surface *src_surface,
126                                         const VARectangle *src_rect,
127                                         struct i965_surface *dst_surface,
128                                         const VARectangle *dst_rect,
129                                         void *filter_param);
130 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
131                                       const struct i965_surface *src_surface,
132                                       const VARectangle *src_rect,
133                                       struct i965_surface *dst_surface,
134                                       const VARectangle *dst_rect,
135                                       void *filter_param);
136
137 static struct pp_module pp_modules_gen5[] = {
138     {
139         {
140             "NULL module (for testing)",
141             PP_NULL,
142             pp_null_gen5,
143             sizeof(pp_null_gen5),
144             NULL,
145         },
146
147         pp_null_initialize,
148     },
149
150     {
151         {
152             "NV12_NV12",
153             PP_NV12_LOAD_SAVE_N12,
154             pp_nv12_load_save_nv12_gen5,
155             sizeof(pp_nv12_load_save_nv12_gen5),
156             NULL,
157         },
158
159         pp_plx_load_save_plx_initialize,
160     },
161
162     {
163         {
164             "NV12_PL3",
165             PP_NV12_LOAD_SAVE_PL3,
166             pp_nv12_load_save_pl3_gen5,
167             sizeof(pp_nv12_load_save_pl3_gen5),
168             NULL,
169         },
170
171         pp_plx_load_save_plx_initialize,
172     },
173
174     {
175         {
176             "PL3_NV12",
177             PP_PL3_LOAD_SAVE_N12,
178             pp_pl3_load_save_nv12_gen5,
179             sizeof(pp_pl3_load_save_nv12_gen5),
180             NULL,
181         },
182
183         pp_plx_load_save_plx_initialize,
184     },
185
186     {
187         {
188             "PL3_PL3",
189             PP_PL3_LOAD_SAVE_N12,
190             pp_pl3_load_save_pl3_gen5,
191             sizeof(pp_pl3_load_save_pl3_gen5),
192             NULL,
193         },
194
195         pp_plx_load_save_plx_initialize
196     },
197
198     {
199         {
200             "NV12 Scaling module",
201             PP_NV12_SCALING,
202             pp_nv12_scaling_gen5,
203             sizeof(pp_nv12_scaling_gen5),
204             NULL,
205         },
206
207         pp_nv12_scaling_initialize,
208     },
209
210     {
211         {
212             "NV12 AVS module",
213             PP_NV12_AVS,
214             pp_nv12_avs_gen5,
215             sizeof(pp_nv12_avs_gen5),
216             NULL,
217         },
218
219         pp_nv12_avs_initialize_nlas,
220     },
221
222     {
223         {
224             "NV12 DNDI module",
225             PP_NV12_DNDI,
226             pp_nv12_dndi_gen5,
227             sizeof(pp_nv12_dndi_gen5),
228             NULL,
229         },
230
231         pp_nv12_dndi_initialize,
232     },
233
234     {
235         {
236             "NV12 DN module",
237             PP_NV12_DN,
238             pp_nv12_dn_gen5,
239             sizeof(pp_nv12_dn_gen5),
240             NULL,
241         },
242
243         pp_nv12_dn_initialize,
244     },
245 };
246
247 static const uint32_t pp_null_gen6[][4] = {
248 #include "shaders/post_processing/gen5_6/null.g6b"
249 };
250
251 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
252 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
253 };
254
255 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
256 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
257 };
258
259 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
260 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
261 };
262
263 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
264 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
265 };
266
267 static const uint32_t pp_nv12_scaling_gen6[][4] = {
268 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
269 };
270
271 static const uint32_t pp_nv12_avs_gen6[][4] = {
272 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
273 };
274
275 static const uint32_t pp_nv12_dndi_gen6[][4] = {
276 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
277 };
278
279 static const uint32_t pp_nv12_dn_gen6[][4] = {
280 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
281 };
282
283 static struct pp_module pp_modules_gen6[] = {
284     {
285         {
286             "NULL module (for testing)",
287             PP_NULL,
288             pp_null_gen6,
289             sizeof(pp_null_gen6),
290             NULL,
291         },
292
293         pp_null_initialize,
294     },
295
296     {
297         {
298             "NV12_NV12",
299             PP_NV12_LOAD_SAVE_N12,
300             pp_nv12_load_save_nv12_gen6,
301             sizeof(pp_nv12_load_save_nv12_gen6),
302             NULL,
303         },
304
305         pp_plx_load_save_plx_initialize,
306     },
307
308     {
309         {
310             "NV12_PL3",
311             PP_NV12_LOAD_SAVE_PL3,
312             pp_nv12_load_save_pl3_gen6,
313             sizeof(pp_nv12_load_save_pl3_gen6),
314             NULL,
315         },
316         
317         pp_plx_load_save_plx_initialize,
318     },
319
320     {
321         {
322             "PL3_NV12",
323             PP_PL3_LOAD_SAVE_N12,
324             pp_pl3_load_save_nv12_gen6,
325             sizeof(pp_pl3_load_save_nv12_gen6),
326             NULL,
327         },
328
329         pp_plx_load_save_plx_initialize,
330     },
331
332     {
333         {
334             "PL3_PL3",
335             PP_PL3_LOAD_SAVE_N12,
336             pp_pl3_load_save_pl3_gen6,
337             sizeof(pp_pl3_load_save_pl3_gen6),
338             NULL,
339         },
340
341         pp_plx_load_save_plx_initialize,
342     },
343
344     {
345         {
346             "NV12 Scaling module",
347             PP_NV12_SCALING,
348             pp_nv12_scaling_gen6,
349             sizeof(pp_nv12_scaling_gen6),
350             NULL,
351         },
352
353         gen6_nv12_scaling_initialize,
354     },
355
356     {
357         {
358             "NV12 AVS module",
359             PP_NV12_AVS,
360             pp_nv12_avs_gen6,
361             sizeof(pp_nv12_avs_gen6),
362             NULL,
363         },
364
365         pp_nv12_avs_initialize_nlas,
366     },
367
368     {
369         {
370             "NV12 DNDI module",
371             PP_NV12_DNDI,
372             pp_nv12_dndi_gen6,
373             sizeof(pp_nv12_dndi_gen6),
374             NULL,
375         },
376
377         pp_nv12_dndi_initialize,
378     },
379
380     {
381         {
382             "NV12 DN module",
383             PP_NV12_DN,
384             pp_nv12_dn_gen6,
385             sizeof(pp_nv12_dn_gen6),
386             NULL,
387         },
388
389         pp_nv12_dn_initialize,
390     },
391 };
392
393 static const uint32_t pp_null_gen7[][4] = {
394 };
395
396 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
397 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
398 };
399
400 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
401 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
402 };
403
404 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
405 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
406 };
407
408 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
409 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
410 };
411
412 static const uint32_t pp_nv12_scaling_gen7[][4] = {
413 #include "shaders/post_processing/gen7/avs.g7b"
414 };
415
416 static const uint32_t pp_nv12_avs_gen7[][4] = {
417 #include "shaders/post_processing/gen7/avs.g7b"
418 };
419
420 static const uint32_t pp_nv12_dndi_gen7[][4] = {
421 #include "shaders/post_processing/gen7/dndi.g7b"
422 };
423
424 static const uint32_t pp_nv12_dn_gen7[][4] = {
425 };
426
427 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
428                                            const struct i965_surface *src_surface,
429                                            const VARectangle *src_rect,
430                                            struct i965_surface *dst_surface,
431                                            const VARectangle *dst_rect,
432                                            void *filter_param);
433 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
434                                              const struct i965_surface *src_surface,
435                                              const VARectangle *src_rect,
436                                              struct i965_surface *dst_surface,
437                                              const VARectangle *dst_rect,
438                                              void *filter_param);
439 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
440                                            const struct i965_surface *src_surface,
441                                            const VARectangle *src_rect,
442                                            struct i965_surface *dst_surface,
443                                            const VARectangle *dst_rect,
444                                            void *filter_param);
445
446 static struct pp_module pp_modules_gen7[] = {
447     {
448         {
449             "NULL module (for testing)",
450             PP_NULL,
451             pp_null_gen7,
452             sizeof(pp_null_gen7),
453             NULL,
454         },
455
456         pp_null_initialize,
457     },
458
459     {
460         {
461             "NV12_NV12",
462             PP_NV12_LOAD_SAVE_N12,
463             pp_nv12_load_save_nv12_gen7,
464             sizeof(pp_nv12_load_save_nv12_gen7),
465             NULL,
466         },
467
468         gen7_pp_plx_avs_initialize,
469     },
470
471     {
472         {
473             "NV12_PL3",
474             PP_NV12_LOAD_SAVE_PL3,
475             pp_nv12_load_save_pl3_gen7,
476             sizeof(pp_nv12_load_save_pl3_gen7),
477             NULL,
478         },
479         
480         gen7_pp_plx_avs_initialize,
481     },
482
483     {
484         {
485             "PL3_NV12",
486             PP_PL3_LOAD_SAVE_N12,
487             pp_pl3_load_save_nv12_gen7,
488             sizeof(pp_pl3_load_save_nv12_gen7),
489             NULL,
490         },
491
492         gen7_pp_plx_avs_initialize,
493     },
494
495     {
496         {
497             "PL3_PL3",
498             PP_PL3_LOAD_SAVE_N12,
499             pp_pl3_load_save_pl3_gen7,
500             sizeof(pp_pl3_load_save_pl3_gen7),
501             NULL,
502         },
503
504         gen7_pp_plx_avs_initialize,
505     },
506
507     {
508         {
509             "NV12 Scaling module",
510             PP_NV12_SCALING,
511             pp_nv12_scaling_gen7,
512             sizeof(pp_nv12_scaling_gen7),
513             NULL,
514         },
515
516         gen7_pp_plx_avs_initialize,
517     },
518
519     {
520         {
521             "NV12 AVS module",
522             PP_NV12_AVS,
523             pp_nv12_avs_gen7,
524             sizeof(pp_nv12_avs_gen7),
525             NULL,
526         },
527
528         gen7_pp_plx_avs_initialize,
529     },
530
531     {
532         {
533             "NV12 DNDI module",
534             PP_NV12_DNDI,
535             pp_nv12_dndi_gen7,
536             sizeof(pp_nv12_dndi_gen7),
537             NULL,
538         },
539
540         gen7_pp_nv12_dndi_initialize,
541     },
542
543     {
544         {
545             "NV12 DN module",
546             PP_NV12_DN,
547             pp_nv12_dn_gen7,
548             sizeof(pp_nv12_dn_gen7),
549             NULL,
550         },
551
552         gen7_pp_nv12_dn_initialize,
553     },
554 };
555
556 static int
557 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
558 {
559     struct i965_driver_data *i965 = i965_driver_data(ctx);
560     int fourcc;
561
562     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
563         struct object_image *obj_image = IMAGE(surface->id);
564         fourcc = obj_image->image.format.fourcc;
565     } else {
566         struct object_surface *obj_surface = SURFACE(surface->id);
567         fourcc = obj_surface->fourcc;
568     }
569
570     return fourcc;
571 }
572
573 static void
574 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
575 {
576     switch (tiling) {
577     case I915_TILING_NONE:
578         ss->ss3.tiled_surface = 0;
579         ss->ss3.tile_walk = 0;
580         break;
581     case I915_TILING_X:
582         ss->ss3.tiled_surface = 1;
583         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
584         break;
585     case I915_TILING_Y:
586         ss->ss3.tiled_surface = 1;
587         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
588         break;
589     }
590 }
591
592 static void
593 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
594 {
595     switch (tiling) {
596     case I915_TILING_NONE:
597         ss->ss2.tiled_surface = 0;
598         ss->ss2.tile_walk = 0;
599         break;
600     case I915_TILING_X:
601         ss->ss2.tiled_surface = 1;
602         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
603         break;
604     case I915_TILING_Y:
605         ss->ss2.tiled_surface = 1;
606         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
607         break;
608     }
609 }
610
611 static void
612 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
613 {
614     switch (tiling) {
615     case I915_TILING_NONE:
616         ss->ss0.tiled_surface = 0;
617         ss->ss0.tile_walk = 0;
618         break;
619     case I915_TILING_X:
620         ss->ss0.tiled_surface = 1;
621         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
622         break;
623     case I915_TILING_Y:
624         ss->ss0.tiled_surface = 1;
625         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
626         break;
627     }
628 }
629
630 static void
631 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
632 {
633     switch (tiling) {
634     case I915_TILING_NONE:
635         ss->ss2.tiled_surface = 0;
636         ss->ss2.tile_walk = 0;
637         break;
638     case I915_TILING_X:
639         ss->ss2.tiled_surface = 1;
640         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
641         break;
642     case I915_TILING_Y:
643         ss->ss2.tiled_surface = 1;
644         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
645         break;
646     }
647 }
648
649 static void
650 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
651 {
652     struct i965_interface_descriptor *desc;
653     dri_bo *bo;
654     int pp_index = pp_context->current_pp;
655
656     bo = pp_context->idrt.bo;
657     dri_bo_map(bo, 1);
658     assert(bo->virtual);
659     desc = bo->virtual;
660     memset(desc, 0, sizeof(*desc));
661     desc->desc0.grf_reg_blocks = 10;
662     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
663     desc->desc1.const_urb_entry_read_offset = 0;
664     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
665     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
666     desc->desc2.sampler_count = 0;
667     desc->desc3.binding_table_entry_count = 0;
668     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
669
670     dri_bo_emit_reloc(bo,
671                       I915_GEM_DOMAIN_INSTRUCTION, 0,
672                       desc->desc0.grf_reg_blocks,
673                       offsetof(struct i965_interface_descriptor, desc0),
674                       pp_context->pp_modules[pp_index].kernel.bo);
675
676     dri_bo_emit_reloc(bo,
677                       I915_GEM_DOMAIN_INSTRUCTION, 0,
678                       desc->desc2.sampler_count << 2,
679                       offsetof(struct i965_interface_descriptor, desc2),
680                       pp_context->sampler_state_table.bo);
681
682     dri_bo_unmap(bo);
683     pp_context->idrt.num_interface_descriptors++;
684 }
685
686 static void
687 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
688 {
689     struct i965_vfe_state *vfe_state;
690     dri_bo *bo;
691
692     bo = pp_context->vfe_state.bo;
693     dri_bo_map(bo, 1);
694     assert(bo->virtual);
695     vfe_state = bo->virtual;
696     memset(vfe_state, 0, sizeof(*vfe_state));
697     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
698     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
699     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
700     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
701     vfe_state->vfe1.children_present = 0;
702     vfe_state->vfe2.interface_descriptor_base = 
703         pp_context->idrt.bo->offset >> 4; /* reloc */
704     dri_bo_emit_reloc(bo,
705                       I915_GEM_DOMAIN_INSTRUCTION, 0,
706                       0,
707                       offsetof(struct i965_vfe_state, vfe2),
708                       pp_context->idrt.bo);
709     dri_bo_unmap(bo);
710 }
711
712 static void
713 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
714 {
715     unsigned char *constant_buffer;
716     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
717
718     assert(sizeof(*pp_static_parameter) == 128);
719     dri_bo_map(pp_context->curbe.bo, 1);
720     assert(pp_context->curbe.bo->virtual);
721     constant_buffer = pp_context->curbe.bo->virtual;
722     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
723     dri_bo_unmap(pp_context->curbe.bo);
724 }
725
726 static void
727 ironlake_pp_states_setup(VADriverContextP ctx,
728                          struct i965_post_processing_context *pp_context)
729 {
730     ironlake_pp_interface_descriptor_table(pp_context);
731     ironlake_pp_vfe_state(pp_context);
732     ironlake_pp_upload_constants(pp_context);
733 }
734
735 static void
736 ironlake_pp_pipeline_select(VADriverContextP ctx,
737                             struct i965_post_processing_context *pp_context)
738 {
739     struct intel_batchbuffer *batch = pp_context->batch;
740
741     BEGIN_BATCH(batch, 1);
742     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
743     ADVANCE_BATCH(batch);
744 }
745
746 static void
747 ironlake_pp_urb_layout(VADriverContextP ctx,
748                        struct i965_post_processing_context *pp_context)
749 {
750     struct intel_batchbuffer *batch = pp_context->batch;
751     unsigned int vfe_fence, cs_fence;
752
753     vfe_fence = pp_context->urb.cs_start;
754     cs_fence = pp_context->urb.size;
755
756     BEGIN_BATCH(batch, 3);
757     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
758     OUT_BATCH(batch, 0);
759     OUT_BATCH(batch, 
760               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
761               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
762     ADVANCE_BATCH(batch);
763 }
764
765 static void
766 ironlake_pp_state_base_address(VADriverContextP ctx,
767                                struct i965_post_processing_context *pp_context)
768 {
769     struct intel_batchbuffer *batch = pp_context->batch;
770
771     BEGIN_BATCH(batch, 8);
772     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
773     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
774     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
775     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
776     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
777     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
778     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
779     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
780     ADVANCE_BATCH(batch);
781 }
782
783 static void
784 ironlake_pp_state_pointers(VADriverContextP ctx,
785                            struct i965_post_processing_context *pp_context)
786 {
787     struct intel_batchbuffer *batch = pp_context->batch;
788
789     BEGIN_BATCH(batch, 3);
790     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
791     OUT_BATCH(batch, 0);
792     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
793     ADVANCE_BATCH(batch);
794 }
795
796 static void 
797 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
798                           struct i965_post_processing_context *pp_context)
799 {
800     struct intel_batchbuffer *batch = pp_context->batch;
801
802     BEGIN_BATCH(batch, 2);
803     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
804     OUT_BATCH(batch,
805               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
806               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
807     ADVANCE_BATCH(batch);
808 }
809
810 static void
811 ironlake_pp_constant_buffer(VADriverContextP ctx,
812                             struct i965_post_processing_context *pp_context)
813 {
814     struct intel_batchbuffer *batch = pp_context->batch;
815
816     BEGIN_BATCH(batch, 2);
817     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
818     OUT_RELOC(batch, pp_context->curbe.bo,
819               I915_GEM_DOMAIN_INSTRUCTION, 0,
820               pp_context->urb.size_cs_entry - 1);
821     ADVANCE_BATCH(batch);    
822 }
823
824 static void
825 ironlake_pp_object_walker(VADriverContextP ctx,
826                           struct i965_post_processing_context *pp_context)
827 {
828     struct intel_batchbuffer *batch = pp_context->batch;
829     int x, x_steps, y, y_steps;
830     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
831
832     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
833     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
834
835     for (y = 0; y < y_steps; y++) {
836         for (x = 0; x < x_steps; x++) {
837             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
838                 BEGIN_BATCH(batch, 20);
839                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
840                 OUT_BATCH(batch, 0);
841                 OUT_BATCH(batch, 0); /* no indirect data */
842                 OUT_BATCH(batch, 0);
843
844                 /* inline data grf 5-6 */
845                 assert(sizeof(*pp_inline_parameter) == 64);
846                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
847
848                 ADVANCE_BATCH(batch);
849             }
850         }
851     }
852 }
853
854 static void
855 ironlake_pp_pipeline_setup(VADriverContextP ctx,
856                            struct i965_post_processing_context *pp_context)
857 {
858     struct intel_batchbuffer *batch = pp_context->batch;
859
860     intel_batchbuffer_start_atomic(batch, 0x1000);
861     intel_batchbuffer_emit_mi_flush(batch);
862     ironlake_pp_pipeline_select(ctx, pp_context);
863     ironlake_pp_state_base_address(ctx, pp_context);
864     ironlake_pp_state_pointers(ctx, pp_context);
865     ironlake_pp_urb_layout(ctx, pp_context);
866     ironlake_pp_cs_urb_layout(ctx, pp_context);
867     ironlake_pp_constant_buffer(ctx, pp_context);
868     ironlake_pp_object_walker(ctx, pp_context);
869     intel_batchbuffer_end_atomic(batch);
870 }
871
872 static void
873 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
874                           dri_bo *surf_bo, unsigned long surf_bo_offset,
875                           int width, int height, int pitch, int format, 
876                           int index, int is_target)
877 {
878     struct i965_surface_state *ss;
879     dri_bo *ss_bo;
880     unsigned int tiling;
881     unsigned int swizzle;
882
883     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
884     ss_bo = pp_context->surface_state_binding_table.bo;
885     assert(ss_bo);
886
887     dri_bo_map(ss_bo, True);
888     assert(ss_bo->virtual);
889     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
890     memset(ss, 0, sizeof(*ss));
891     ss->ss0.surface_type = I965_SURFACE_2D;
892     ss->ss0.surface_format = format;
893     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
894     ss->ss2.width = width - 1;
895     ss->ss2.height = height - 1;
896     ss->ss3.pitch = pitch - 1;
897     pp_set_surface_tiling(ss, tiling);
898     dri_bo_emit_reloc(ss_bo,
899                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
900                       surf_bo_offset,
901                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
902                       surf_bo);
903     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
904     dri_bo_unmap(ss_bo);
905 }
906
907 static void
908 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
909                            dri_bo *surf_bo, unsigned long surf_bo_offset,
910                            int width, int height, int wpitch,
911                            int xoffset, int yoffset,
912                            int format, int interleave_chroma,
913                            int index)
914 {
915     struct i965_surface_state2 *ss2;
916     dri_bo *ss2_bo;
917     unsigned int tiling;
918     unsigned int swizzle;
919
920     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
921     ss2_bo = pp_context->surface_state_binding_table.bo;
922     assert(ss2_bo);
923
924     dri_bo_map(ss2_bo, True);
925     assert(ss2_bo->virtual);
926     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
927     memset(ss2, 0, sizeof(*ss2));
928     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
929     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
930     ss2->ss1.width = width - 1;
931     ss2->ss1.height = height - 1;
932     ss2->ss2.pitch = wpitch - 1;
933     ss2->ss2.interleave_chroma = interleave_chroma;
934     ss2->ss2.surface_format = format;
935     ss2->ss3.x_offset_for_cb = xoffset;
936     ss2->ss3.y_offset_for_cb = yoffset;
937     pp_set_surface2_tiling(ss2, tiling);
938     dri_bo_emit_reloc(ss2_bo,
939                       I915_GEM_DOMAIN_RENDER, 0,
940                       surf_bo_offset,
941                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
942                       surf_bo);
943     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
944     dri_bo_unmap(ss2_bo);
945 }
946
947 static void
948 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
949                           dri_bo *surf_bo, unsigned long surf_bo_offset,
950                           int width, int height, int pitch, int format, 
951                           int index, int is_target)
952 {
953     struct gen7_surface_state *ss;
954     dri_bo *ss_bo;
955     unsigned int tiling;
956     unsigned int swizzle;
957
958     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
959     ss_bo = pp_context->surface_state_binding_table.bo;
960     assert(ss_bo);
961
962     dri_bo_map(ss_bo, True);
963     assert(ss_bo->virtual);
964     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
965     memset(ss, 0, sizeof(*ss));
966     ss->ss0.surface_type = I965_SURFACE_2D;
967     ss->ss0.surface_format = format;
968     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
969     ss->ss2.width = width - 1;
970     ss->ss2.height = height - 1;
971     ss->ss3.pitch = pitch - 1;
972     gen7_pp_set_surface_tiling(ss, tiling);
973     dri_bo_emit_reloc(ss_bo,
974                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
975                       surf_bo_offset,
976                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
977                       surf_bo);
978     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
979     dri_bo_unmap(ss_bo);
980 }
981
982 static void
983 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
984                            dri_bo *surf_bo, unsigned long surf_bo_offset,
985                            int width, int height, int wpitch,
986                            int xoffset, int yoffset,
987                            int format, int interleave_chroma,
988                            int index)
989 {
990     struct gen7_surface_state2 *ss2;
991     dri_bo *ss2_bo;
992     unsigned int tiling;
993     unsigned int swizzle;
994
995     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
996     ss2_bo = pp_context->surface_state_binding_table.bo;
997     assert(ss2_bo);
998
999     dri_bo_map(ss2_bo, True);
1000     assert(ss2_bo->virtual);
1001     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1002     memset(ss2, 0, sizeof(*ss2));
1003     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1004     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1005     ss2->ss1.width = width - 1;
1006     ss2->ss1.height = height - 1;
1007     ss2->ss2.pitch = wpitch - 1;
1008     ss2->ss2.interleave_chroma = interleave_chroma;
1009     ss2->ss2.surface_format = format;
1010     ss2->ss3.x_offset_for_cb = xoffset;
1011     ss2->ss3.y_offset_for_cb = yoffset;
1012     gen7_pp_set_surface2_tiling(ss2, tiling);
1013     dri_bo_emit_reloc(ss2_bo,
1014                       I915_GEM_DOMAIN_RENDER, 0,
1015                       surf_bo_offset,
1016                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1017                       surf_bo);
1018     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1019     dri_bo_unmap(ss2_bo);
1020 }
1021
1022 static void 
1023 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1024                                 const struct i965_surface *surface, 
1025                                 int base_index, int is_target,
1026                                 int *width, int *height, int *pitch, int *offset)
1027 {
1028     struct i965_driver_data *i965 = i965_driver_data(ctx);
1029     struct object_surface *obj_surface;
1030     struct object_image *obj_image;
1031     dri_bo *bo;
1032     int fourcc = pp_get_surface_fourcc(ctx, surface);
1033     const int Y = 0;
1034     const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
1035     const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
1036     const int UV = 1;
1037     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1038
1039     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1040         obj_surface = SURFACE(surface->id);
1041         bo = obj_surface->bo;
1042         width[0] = obj_surface->orig_width;
1043         height[0] = obj_surface->orig_height;
1044         pitch[0] = obj_surface->width;
1045         offset[0] = 0;
1046
1047         if (interleaved_uv) {
1048             width[1] = obj_surface->orig_width;
1049             height[1] = obj_surface->orig_height / 2;
1050             pitch[1] = obj_surface->width;
1051             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1052         } else {
1053             width[1] = obj_surface->orig_width / 2;
1054             height[1] = obj_surface->orig_height / 2;
1055             pitch[1] = obj_surface->width / 2;
1056             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1057             width[2] = obj_surface->orig_width / 2;
1058             height[2] = obj_surface->orig_height / 2;
1059             pitch[2] = obj_surface->width / 2;
1060             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
1061         }
1062     } else {
1063         obj_image = IMAGE(surface->id);
1064         bo = obj_image->bo;
1065         width[0] = obj_image->image.width;
1066         height[0] = obj_image->image.height;
1067         pitch[0] = obj_image->image.pitches[0];
1068         offset[0] = obj_image->image.offsets[0];
1069
1070         if (interleaved_uv) {
1071             width[1] = obj_image->image.width;
1072             height[1] = obj_image->image.height / 2;
1073             pitch[1] = obj_image->image.pitches[1];
1074             offset[1] = obj_image->image.offsets[1];
1075         } else {
1076             width[1] = obj_image->image.width / 2;
1077             height[1] = obj_image->image.height / 2;
1078             pitch[1] = obj_image->image.pitches[1];
1079             offset[1] = obj_image->image.offsets[1];
1080             width[2] = obj_image->image.width / 2;
1081             height[2] = obj_image->image.height / 2;
1082             pitch[2] = obj_image->image.pitches[2];
1083             offset[2] = obj_image->image.offsets[2];
1084         }
1085     }
1086
1087     /* Y surface */
1088     i965_pp_set_surface_state(ctx, pp_context,
1089                               bo, offset[Y],
1090                               width[Y] / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
1091                               base_index, is_target);
1092
1093     if (interleaved_uv) {
1094         i965_pp_set_surface_state(ctx, pp_context,
1095                                   bo, offset[UV],
1096                                   width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
1097                                   base_index + 1, is_target);
1098     } else {
1099         /* U surface */
1100         i965_pp_set_surface_state(ctx, pp_context,
1101                                   bo, offset[U],
1102                                   width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
1103                                   base_index + 1, is_target);
1104
1105         /* V surface */
1106         i965_pp_set_surface_state(ctx, pp_context,
1107                                   bo, offset[V],
1108                                   width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
1109                                   base_index + 2, is_target);
1110     }
1111
1112 }
1113
1114 static void 
1115 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1116                                      const struct i965_surface *surface, 
1117                                      int base_index, int is_target,
1118                                      int *width, int *height, int *pitch, int *offset)
1119 {
1120     struct i965_driver_data *i965 = i965_driver_data(ctx);
1121     struct object_surface *obj_surface;
1122     struct object_image *obj_image;
1123     dri_bo *bo;
1124     int fourcc = pp_get_surface_fourcc(ctx, surface);
1125     const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1126                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1;
1127     const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1128                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2;
1129     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1130
1131     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1132         obj_surface = SURFACE(surface->id);
1133         bo = obj_surface->bo;
1134         width[0] = obj_surface->orig_width;
1135         height[0] = obj_surface->orig_height;
1136         pitch[0] = obj_surface->width;
1137         offset[0] = 0;
1138
1139         width[1] = obj_surface->cb_cr_width;
1140         height[1] = obj_surface->cb_cr_height;
1141         pitch[1] = obj_surface->cb_cr_pitch;
1142         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
1143
1144         width[2] = obj_surface->cb_cr_width;
1145         height[2] = obj_surface->cb_cr_height;
1146         pitch[2] = obj_surface->cb_cr_pitch;
1147         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
1148     } else {
1149         obj_image = IMAGE(surface->id);
1150         bo = obj_image->bo;
1151         width[0] = obj_image->image.width;
1152         height[0] = obj_image->image.height;
1153         pitch[0] = obj_image->image.pitches[0];
1154         offset[0] = obj_image->image.offsets[0];
1155
1156         if (interleaved_uv) {
1157             width[1] = obj_image->image.width;
1158             height[1] = obj_image->image.height / 2;
1159             pitch[1] = obj_image->image.pitches[1];
1160             offset[1] = obj_image->image.offsets[1];
1161         } else {
1162             width[1] = obj_image->image.width / 2;
1163             height[1] = obj_image->image.height / 2;
1164             pitch[1] = obj_image->image.pitches[U];
1165             offset[1] = obj_image->image.offsets[U];
1166             width[2] = obj_image->image.width / 2;
1167             height[2] = obj_image->image.height / 2;
1168             pitch[2] = obj_image->image.pitches[V];
1169             offset[2] = obj_image->image.offsets[V];
1170         }
1171     }
1172
1173     if (is_target) {
1174         gen7_pp_set_surface_state(ctx, pp_context,
1175                                   bo, 0,
1176                                   width[0] / 4, height[0], pitch[0],
1177                                   I965_SURFACEFORMAT_R8_SINT,
1178                                   base_index, 1);
1179
1180         if (interleaved_uv) {
1181             gen7_pp_set_surface_state(ctx, pp_context,
1182                                       bo, offset[1],
1183                                       width[1] / 2, height[1], pitch[1],
1184                                       I965_SURFACEFORMAT_R8G8_SINT,
1185                                       base_index + 1, 1);
1186         } else {
1187             gen7_pp_set_surface_state(ctx, pp_context,
1188                                       bo, offset[1],
1189                                       width[1] / 4, height[1], pitch[1],
1190                                       I965_SURFACEFORMAT_R8_SINT,
1191                                       base_index + 1, 1);
1192             gen7_pp_set_surface_state(ctx, pp_context,
1193                                       bo, offset[2],
1194                                       width[2] / 4, height[2], pitch[2],
1195                                       I965_SURFACEFORMAT_R8_SINT,
1196                                       base_index + 2, 1);
1197         }
1198     } else {
1199         gen7_pp_set_surface2_state(ctx, pp_context,
1200                                    bo, offset[0],
1201                                    width[0], height[0], pitch[0],
1202                                    0, 0,
1203                                    SURFACE_FORMAT_Y8_UNORM, 0,
1204                                    base_index);
1205
1206         if (interleaved_uv) {
1207             gen7_pp_set_surface2_state(ctx, pp_context,
1208                                        bo, offset[1],
1209                                        width[1], height[1], pitch[1],
1210                                        0, 0,
1211                                        SURFACE_FORMAT_R8B8_UNORM, 0,
1212                                        base_index + 1);
1213         } else {
1214             gen7_pp_set_surface2_state(ctx, pp_context,
1215                                        bo, offset[1],
1216                                        width[1], height[1], pitch[1],
1217                                        0, 0,
1218                                        SURFACE_FORMAT_R8_UNORM, 0,
1219                                        base_index + 1);
1220             gen7_pp_set_surface2_state(ctx, pp_context,
1221                                        bo, offset[2],
1222                                        width[2], height[2], pitch[2],
1223                                        0, 0,
1224                                        SURFACE_FORMAT_R8_UNORM, 0,
1225                                        base_index + 2);
1226         }
1227     }
1228 }
1229
1230 static int
1231 pp_null_x_steps(void *private_context)
1232 {
1233     return 1;
1234 }
1235
1236 static int
1237 pp_null_y_steps(void *private_context)
1238 {
1239     return 1;
1240 }
1241
1242 static int
1243 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1244 {
1245     return 0;
1246 }
1247
1248 static VAStatus
1249 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1250                    const struct i965_surface *src_surface,
1251                    const VARectangle *src_rect,
1252                    struct i965_surface *dst_surface,
1253                    const VARectangle *dst_rect,
1254                    void *filter_param)
1255 {
1256     /* private function & data */
1257     pp_context->pp_x_steps = pp_null_x_steps;
1258     pp_context->pp_y_steps = pp_null_y_steps;
1259     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
1260
1261     dst_surface->flags = src_surface->flags;
1262
1263     return VA_STATUS_SUCCESS;
1264 }
1265
1266 static int
1267 pp_load_save_x_steps(void *private_context)
1268 {
1269     return 1;
1270 }
1271
1272 static int
1273 pp_load_save_y_steps(void *private_context)
1274 {
1275     struct pp_load_save_context *pp_load_save_context = private_context;
1276
1277     return pp_load_save_context->dest_h / 8;
1278 }
1279
1280 static int
1281 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1282 {
1283     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1284
1285     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
1286     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
1287     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
1288     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
1289
1290     return 0;
1291 }
1292
1293 static VAStatus
1294 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1295                                 const struct i965_surface *src_surface,
1296                                 const VARectangle *src_rect,
1297                                 struct i965_surface *dst_surface,
1298                                 const VARectangle *dst_rect,
1299                                 void *filter_param)
1300 {
1301     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
1302     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1303     int width[3], height[3], pitch[3], offset[3];
1304     const int Y = 0;
1305
1306     /* source surface */
1307     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
1308                                     width, height, pitch, offset);
1309
1310     /* destination surface */
1311     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
1312                                     width, height, pitch, offset);
1313
1314     /* private function & data */
1315     pp_context->pp_x_steps = pp_load_save_x_steps;
1316     pp_context->pp_y_steps = pp_load_save_y_steps;
1317     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
1318     pp_load_save_context->dest_h = ALIGN(height[Y], 16);
1319     pp_load_save_context->dest_w = ALIGN(width[Y], 16);
1320
1321     pp_inline_parameter->grf5.block_count_x = ALIGN(width[Y], 16) / 16;   /* 1 x N */
1322     pp_inline_parameter->grf5.number_blocks = ALIGN(width[Y], 16) / 16;
1323
1324     dst_surface->flags = src_surface->flags;
1325
1326     return VA_STATUS_SUCCESS;
1327 }
1328
1329 static int
1330 pp_scaling_x_steps(void *private_context)
1331 {
1332     return 1;
1333 }
1334
1335 static int
1336 pp_scaling_y_steps(void *private_context)
1337 {
1338     struct pp_scaling_context *pp_scaling_context = private_context;
1339
1340     return pp_scaling_context->dest_h / 8;
1341 }
1342
1343 static int
1344 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1345 {
1346     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
1347     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1348     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1349     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1350     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
1351
1352     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
1353     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
1354     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
1355     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
1356     
1357     return 0;
1358 }
1359
1360 static VAStatus
1361 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1362                            const struct i965_surface *src_surface,
1363                            const VARectangle *src_rect,
1364                            struct i965_surface *dst_surface,
1365                            const VARectangle *dst_rect,
1366                            void *filter_param)
1367 {
1368     struct i965_driver_data *i965 = i965_driver_data(ctx);
1369     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
1370     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1371     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1372     struct object_surface *obj_surface;
1373     struct i965_sampler_state *sampler_state;
1374     int in_w, in_h, in_wpitch, in_hpitch;
1375     int out_w, out_h, out_wpitch, out_hpitch;
1376
1377     /* source surface */
1378     obj_surface = SURFACE(src_surface->id);
1379     in_w = obj_surface->orig_width;
1380     in_h = obj_surface->orig_height;
1381     in_wpitch = obj_surface->width;
1382     in_hpitch = obj_surface->height;
1383
1384     /* source Y surface index 1 */
1385     i965_pp_set_surface_state(ctx, pp_context,
1386                               obj_surface->bo, 0,
1387                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1388                               1, 0);
1389
1390     /* source UV surface index 2 */
1391     i965_pp_set_surface_state(ctx, pp_context,
1392                               obj_surface->bo, in_wpitch * in_hpitch,
1393                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1394                               2, 0);
1395
1396     /* destination surface */
1397     obj_surface = SURFACE(dst_surface->id);
1398     out_w = obj_surface->orig_width;
1399     out_h = obj_surface->orig_height;
1400     out_wpitch = obj_surface->width;
1401     out_hpitch = obj_surface->height;
1402
1403     /* destination Y surface index 7 */
1404     i965_pp_set_surface_state(ctx, pp_context,
1405                               obj_surface->bo, 0,
1406                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1407                               7, 1);
1408
1409     /* destination UV surface index 8 */
1410     i965_pp_set_surface_state(ctx, pp_context,
1411                               obj_surface->bo, out_wpitch * out_hpitch,
1412                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1413                               8, 1);
1414
1415     /* sampler state */
1416     dri_bo_map(pp_context->sampler_state_table.bo, True);
1417     assert(pp_context->sampler_state_table.bo->virtual);
1418     sampler_state = pp_context->sampler_state_table.bo->virtual;
1419
1420     /* SIMD16 Y index 1 */
1421     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
1422     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1423     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1424     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1425     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1426
1427     /* SIMD16 UV index 2 */
1428     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
1429     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1430     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1431     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1432     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1433
1434     dri_bo_unmap(pp_context->sampler_state_table.bo);
1435
1436     /* private function & data */
1437     pp_context->pp_x_steps = pp_scaling_x_steps;
1438     pp_context->pp_y_steps = pp_scaling_y_steps;
1439     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
1440
1441     pp_scaling_context->dest_x = dst_rect->x;
1442     pp_scaling_context->dest_y = dst_rect->y;
1443     pp_scaling_context->dest_w = ALIGN(dst_rect->width, 16);
1444     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 16);
1445     pp_scaling_context->src_normalized_x = (float)src_rect->x / in_w;
1446     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
1447
1448     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
1449
1450     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / dst_rect->width;
1451     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
1452     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
1453     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
1454     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
1455
1456     dst_surface->flags = src_surface->flags;
1457
1458     return VA_STATUS_SUCCESS;
1459 }
1460
1461 static int
1462 pp_avs_x_steps(void *private_context)
1463 {
1464     struct pp_avs_context *pp_avs_context = private_context;
1465
1466     return pp_avs_context->dest_w / 16;
1467 }
1468
1469 static int
1470 pp_avs_y_steps(void *private_context)
1471 {
1472     return 1;
1473 }
1474
1475 static int
1476 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1477 {
1478     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1479     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1480     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1481     float src_x_steping, src_y_steping, video_step_delta;
1482     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
1483
1484     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
1485         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1486         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
1487     } else if (tmp_w >= pp_avs_context->dest_w) {
1488         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1489         pp_inline_parameter->grf6.video_step_delta = 0;
1490         
1491         if (x == 0) {
1492             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
1493                 pp_avs_context->src_normalized_x;
1494         } else {
1495             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1496             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1497             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1498                 16 * 15 * video_step_delta / 2;
1499         }
1500     } else {
1501         int n0, n1, n2, nls_left, nls_right;
1502         int factor_a = 5, factor_b = 4;
1503         float f;
1504
1505         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1506         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1507         n2 = tmp_w / (16 * factor_a);
1508         nls_left = n0 + n2;
1509         nls_right = n1 + n2;
1510         f = (float) n2 * 16 / tmp_w;
1511         
1512         if (n0 < 5) {
1513             pp_inline_parameter->grf6.video_step_delta = 0.0;
1514
1515             if (x == 0) {
1516                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1517                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1518             } else {
1519                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1520                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1521                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1522                     16 * 15 * video_step_delta / 2;
1523             }
1524         } else {
1525             if (x < nls_left) {
1526                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1527                 float a = f / (nls_left * 16 * factor_b);
1528                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1529                 
1530                 pp_inline_parameter->grf6.video_step_delta = b;
1531
1532                 if (x == 0) {
1533                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1534                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
1535                 } else {
1536                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1537                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1538                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1539                         16 * 15 * video_step_delta / 2;
1540                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
1541                 }
1542             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1543                 /* scale the center linearly */
1544                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1545                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1546                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1547                     16 * 15 * video_step_delta / 2;
1548                 pp_inline_parameter->grf6.video_step_delta = 0.0;
1549                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1550             } else {
1551                 float a = f / (nls_right * 16 * factor_b);
1552                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1553
1554                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1555                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1556                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1557                     16 * 15 * video_step_delta / 2;
1558                 pp_inline_parameter->grf6.video_step_delta = -b;
1559
1560                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
1561                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
1562                 else
1563                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
1564             }
1565         }
1566     }
1567
1568     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
1569     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
1570     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
1571     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
1572
1573     return 0;
1574 }
1575
1576 static VAStatus
1577 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1578                        const struct i965_surface *src_surface,
1579                        const VARectangle *src_rect,
1580                        struct i965_surface *dst_surface,
1581                        const VARectangle *dst_rect,
1582                        void *filter_param,
1583                        int nlas)
1584 {
1585     struct i965_driver_data *i965 = i965_driver_data(ctx);
1586     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1587     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1588     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1589     struct object_surface *obj_surface;
1590     struct i965_sampler_8x8 *sampler_8x8;
1591     struct i965_sampler_8x8_state *sampler_8x8_state;
1592     int index;
1593     int in_w, in_h, in_wpitch, in_hpitch;
1594     int out_w, out_h, out_wpitch, out_hpitch;
1595     int i;
1596
1597     /* surface */
1598     obj_surface = SURFACE(src_surface->id);
1599     in_w = obj_surface->orig_width;
1600     in_h = obj_surface->orig_height;
1601     in_wpitch = obj_surface->width;
1602     in_hpitch = obj_surface->height;
1603
1604     /* source Y surface index 1 */
1605     i965_pp_set_surface2_state(ctx, pp_context,
1606                                obj_surface->bo, 0,
1607                                in_w, in_h, in_wpitch,
1608                                0, 0,
1609                                SURFACE_FORMAT_Y8_UNORM, 0,
1610                                1);
1611
1612     /* source UV surface index 2 */
1613     i965_pp_set_surface2_state(ctx, pp_context,
1614                                obj_surface->bo, in_wpitch * in_hpitch,
1615                                in_w / 2, in_h / 2, in_wpitch,
1616                                0, 0,
1617                                SURFACE_FORMAT_R8B8_UNORM, 0,
1618                                2);
1619
1620     /* destination surface */
1621     obj_surface = SURFACE(dst_surface->id);
1622     out_w = obj_surface->orig_width;
1623     out_h = obj_surface->orig_height;
1624     out_wpitch = obj_surface->width;
1625     out_hpitch = obj_surface->height;
1626     assert(out_w <= out_wpitch && out_h <= out_hpitch);
1627
1628     /* destination Y surface index 7 */
1629     i965_pp_set_surface_state(ctx, pp_context,
1630                               obj_surface->bo, 0,
1631                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1632                               7, 1);
1633
1634     /* destination UV surface index 8 */
1635     i965_pp_set_surface_state(ctx, pp_context,
1636                               obj_surface->bo, out_wpitch * out_hpitch,
1637                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1638                               8, 1);
1639
1640     /* sampler 8x8 state */
1641     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1642     assert(pp_context->sampler_state_table.bo_8x8->virtual);
1643     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1644     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1645     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1646
1647     for (i = 0; i < 17; i++) {
1648         /* for Y channel, currently ignore */
1649         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x00;
1650         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x00;
1651         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x08;
1652         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x18;
1653         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x18;
1654         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x08;
1655         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x00;
1656         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x00;
1657         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x00;
1658         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x00;
1659         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x10;
1660         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x10;
1661         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x10;
1662         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x10;
1663         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x00;
1664         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x00;
1665         /* for U/V channel, 0.25 */
1666         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
1667         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
1668         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
1669         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
1670         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
1671         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
1672         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
1673         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
1674         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
1675         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
1676         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
1677         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
1678         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
1679         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
1680         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
1681         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
1682     }
1683
1684     sampler_8x8_state->dw136.default_sharpness_level = 0;
1685     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1686     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1687     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1688     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1689
1690     /* sampler 8x8 */
1691     dri_bo_map(pp_context->sampler_state_table.bo, True);
1692     assert(pp_context->sampler_state_table.bo->virtual);
1693     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
1694     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1695
1696     /* sample_8x8 Y index 1 */
1697     index = 1;
1698     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1699     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1700     sampler_8x8[index].dw0.ief_bypass = 1;
1701     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1702     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1703     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1704     sampler_8x8[index].dw2.global_noise_estimation = 22;
1705     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1706     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1707     sampler_8x8[index].dw3.strong_edge_weight = 7;
1708     sampler_8x8[index].dw3.regular_weight = 2;
1709     sampler_8x8[index].dw3.non_edge_weight = 0;
1710     sampler_8x8[index].dw3.gain_factor = 40;
1711     sampler_8x8[index].dw4.steepness_boost = 0;
1712     sampler_8x8[index].dw4.steepness_threshold = 0;
1713     sampler_8x8[index].dw4.mr_boost = 0;
1714     sampler_8x8[index].dw4.mr_threshold = 5;
1715     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1716     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1717     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1718     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1719     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1720     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1721     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1722     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1723     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1724     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1725     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1726     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1727     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1728     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1729     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1730     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1731     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1732     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1733     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1734     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1735     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1736     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1737     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1738     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1739     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1740     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1741     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1742     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1743     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1744     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1745     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1746     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1747     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1748     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1749     sampler_8x8[index].dw13.limiter_boost = 0;
1750     sampler_8x8[index].dw13.minimum_limiter = 10;
1751     sampler_8x8[index].dw13.maximum_limiter = 11;
1752     sampler_8x8[index].dw14.clip_limiter = 130;
1753     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1754                       I915_GEM_DOMAIN_RENDER, 
1755                       0,
1756                       0,
1757                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1758                       pp_context->sampler_state_table.bo_8x8);
1759
1760     /* sample_8x8 UV index 2 */
1761     index = 2;
1762     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1763     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1764     sampler_8x8[index].dw0.ief_bypass = 1;
1765     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1766     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1767     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1768     sampler_8x8[index].dw2.global_noise_estimation = 22;
1769     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1770     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1771     sampler_8x8[index].dw3.strong_edge_weight = 7;
1772     sampler_8x8[index].dw3.regular_weight = 2;
1773     sampler_8x8[index].dw3.non_edge_weight = 0;
1774     sampler_8x8[index].dw3.gain_factor = 40;
1775     sampler_8x8[index].dw4.steepness_boost = 0;
1776     sampler_8x8[index].dw4.steepness_threshold = 0;
1777     sampler_8x8[index].dw4.mr_boost = 0;
1778     sampler_8x8[index].dw4.mr_threshold = 5;
1779     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1780     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1781     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1782     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1783     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1784     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1785     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1786     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1787     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1788     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1789     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1790     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1791     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1792     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1793     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1794     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1795     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1796     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1797     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1798     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1799     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1800     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1801     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1802     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1803     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1804     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1805     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1806     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1807     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1808     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1809     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1810     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1811     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1812     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1813     sampler_8x8[index].dw13.limiter_boost = 0;
1814     sampler_8x8[index].dw13.minimum_limiter = 10;
1815     sampler_8x8[index].dw13.maximum_limiter = 11;
1816     sampler_8x8[index].dw14.clip_limiter = 130;
1817     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1818                       I915_GEM_DOMAIN_RENDER, 
1819                       0,
1820                       0,
1821                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1822                       pp_context->sampler_state_table.bo_8x8);
1823
1824     dri_bo_unmap(pp_context->sampler_state_table.bo);
1825
1826     /* private function & data */
1827     pp_context->pp_x_steps = pp_avs_x_steps;
1828     pp_context->pp_y_steps = pp_avs_y_steps;
1829     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
1830
1831     pp_avs_context->dest_x = dst_rect->x;
1832     pp_avs_context->dest_y = dst_rect->y;
1833     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
1834     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
1835     pp_avs_context->src_normalized_x = (float)src_rect->x / in_w;
1836     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
1837     pp_avs_context->src_w = src_rect->width;
1838     pp_avs_context->src_h = src_rect->height;
1839
1840     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
1841     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
1842
1843     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / dst_rect->width;
1844     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
1845     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
1846     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
1847     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
1848     pp_inline_parameter->grf6.video_step_delta = 0.0;
1849
1850     dst_surface->flags = src_surface->flags;
1851
1852     return VA_STATUS_SUCCESS;
1853 }
1854
1855 static VAStatus
1856 pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1857                             const struct i965_surface *src_surface,
1858                             const VARectangle *src_rect,
1859                             struct i965_surface *dst_surface,
1860                             const VARectangle *dst_rect,
1861                             void *filter_param)
1862 {
1863     return pp_nv12_avs_initialize(ctx, pp_context,
1864                                   src_surface,
1865                                   src_rect,
1866                                   dst_surface,
1867                                   dst_rect,
1868                                   filter_param,
1869                                   1);
1870 }
1871
1872 static VAStatus
1873 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1874                              const struct i965_surface *src_surface,
1875                              const VARectangle *src_rect,
1876                              struct i965_surface *dst_surface,
1877                              const VARectangle *dst_rect,
1878                              void *filter_param)
1879 {
1880     return pp_nv12_avs_initialize(ctx, pp_context,
1881                                   src_surface,
1882                                   src_rect,
1883                                   dst_surface,
1884                                   dst_rect,
1885                                   filter_param,
1886                                   0);    
1887 }
1888
1889 static int
1890 gen7_pp_avs_x_steps(void *private_context)
1891 {
1892     struct pp_avs_context *pp_avs_context = private_context;
1893
1894     return pp_avs_context->dest_w / 16;
1895 }
1896
1897 static int
1898 gen7_pp_avs_y_steps(void *private_context)
1899 {
1900     struct pp_avs_context *pp_avs_context = private_context;
1901
1902     return pp_avs_context->dest_h / 16;
1903 }
1904
1905 static int
1906 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1907 {
1908     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1909     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1910
1911     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
1912     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
1913     pp_inline_parameter->grf7.constant_0 = 0xffffffff;
1914     pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = 1.0 / pp_avs_context->src_w;
1915
1916     return 0;
1917 }
1918
1919 static VAStatus
1920 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1921                            const struct i965_surface *src_surface,
1922                            const VARectangle *src_rect,
1923                            struct i965_surface *dst_surface,
1924                            const VARectangle *dst_rect,
1925                            void *filter_param)
1926 {
1927     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1928     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1929     struct gen7_sampler_8x8 *sampler_8x8;
1930     struct i965_sampler_8x8_state *sampler_8x8_state;
1931     int index, i;
1932     int width[3], height[3], pitch[3], offset[3];
1933
1934     /* source surface */
1935     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
1936                                          width, height, pitch, offset);
1937
1938     /* destination surface */
1939     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
1940                                          width, height, pitch, offset);
1941
1942     /* sampler 8x8 state */
1943     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1944     assert(pp_context->sampler_state_table.bo_8x8->virtual);
1945     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1946     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1947     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1948
1949     for (i = 0; i < 17; i++) {
1950         /* for Y channel, currently ignore */
1951         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
1952         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
1953         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
1954         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x0;
1955         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x0;
1956         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
1957         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
1958         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
1959         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
1960         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
1961         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
1962         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x0;
1963         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x0;
1964         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
1965         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
1966         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
1967         /* for U/V channel, 0.25 */
1968         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
1969         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
1970         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
1971         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
1972         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
1973         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
1974         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
1975         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
1976         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
1977         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
1978         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
1979         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
1980         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
1981         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
1982         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
1983         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
1984     }
1985
1986     sampler_8x8_state->dw136.default_sharpness_level = 0;
1987     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1988     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1989     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1990     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1991
1992     /* sampler 8x8 */
1993     dri_bo_map(pp_context->sampler_state_table.bo, True);
1994     assert(pp_context->sampler_state_table.bo->virtual);
1995     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
1996     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1997
1998     /* sample_8x8 Y index 4 */
1999     index = 4;
2000     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2001     sampler_8x8[index].dw0.global_noise_estimation = 255;
2002     sampler_8x8[index].dw0.ief_bypass = 1;
2003
2004     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2005
2006     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2007     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2008     sampler_8x8[index].dw2.r5x_coefficient = 9;
2009     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2010     sampler_8x8[index].dw2.r5c_coefficient = 3;
2011
2012     sampler_8x8[index].dw3.r3x_coefficient = 27;
2013     sampler_8x8[index].dw3.r3c_coefficient = 5;
2014     sampler_8x8[index].dw3.gain_factor = 40;
2015     sampler_8x8[index].dw3.non_edge_weight = 1;
2016     sampler_8x8[index].dw3.regular_weight = 2;
2017     sampler_8x8[index].dw3.strong_edge_weight = 7;
2018     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2019
2020     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2021                       I915_GEM_DOMAIN_RENDER, 
2022                       0,
2023                       0,
2024                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2025                       pp_context->sampler_state_table.bo_8x8);
2026
2027     /* sample_8x8 UV index 8 */
2028     index = 8;
2029     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2030     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2031     sampler_8x8[index].dw0.global_noise_estimation = 255;
2032     sampler_8x8[index].dw0.ief_bypass = 1;
2033     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2034     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2035     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2036     sampler_8x8[index].dw2.r5x_coefficient = 9;
2037     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2038     sampler_8x8[index].dw2.r5c_coefficient = 3;
2039     sampler_8x8[index].dw3.r3x_coefficient = 27;
2040     sampler_8x8[index].dw3.r3c_coefficient = 5;
2041     sampler_8x8[index].dw3.gain_factor = 40;
2042     sampler_8x8[index].dw3.non_edge_weight = 1;
2043     sampler_8x8[index].dw3.regular_weight = 2;
2044     sampler_8x8[index].dw3.strong_edge_weight = 7;
2045     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2046
2047     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2048                       I915_GEM_DOMAIN_RENDER, 
2049                       0,
2050                       0,
2051                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2052                       pp_context->sampler_state_table.bo_8x8);
2053
2054     /* sampler_8x8 V, index 12 */
2055     index = 12;
2056     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2057     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2058     sampler_8x8[index].dw0.global_noise_estimation = 255;
2059     sampler_8x8[index].dw0.ief_bypass = 1;
2060     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2061     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2062     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2063     sampler_8x8[index].dw2.r5x_coefficient = 9;
2064     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2065     sampler_8x8[index].dw2.r5c_coefficient = 3;
2066     sampler_8x8[index].dw3.r3x_coefficient = 27;
2067     sampler_8x8[index].dw3.r3c_coefficient = 5;
2068     sampler_8x8[index].dw3.gain_factor = 40;
2069     sampler_8x8[index].dw3.non_edge_weight = 1;
2070     sampler_8x8[index].dw3.regular_weight = 2;
2071     sampler_8x8[index].dw3.strong_edge_weight = 7;
2072     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2073
2074     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2075                       I915_GEM_DOMAIN_RENDER, 
2076                       0,
2077                       0,
2078                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2079                       pp_context->sampler_state_table.bo_8x8);
2080
2081     dri_bo_unmap(pp_context->sampler_state_table.bo);
2082
2083     /* private function & data */
2084     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
2085     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
2086     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
2087
2088     pp_avs_context->dest_x = dst_rect->x;
2089     pp_avs_context->dest_y = dst_rect->y;
2090     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
2091     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
2092     pp_avs_context->src_w = src_rect->width;
2093     pp_avs_context->src_h = src_rect->height;
2094
2095     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2096     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / pp_avs_context->dest_w;
2097     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) 1.0 / pp_avs_context->dest_h;
2098     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = -(float)pp_avs_context->dest_y / pp_avs_context->dest_h;
2099     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = -(float)pp_avs_context->dest_x / pp_avs_context->dest_w;
2100
2101     dst_surface->flags = src_surface->flags;
2102
2103     return VA_STATUS_SUCCESS;
2104 }
2105
2106 static int
2107 pp_dndi_x_steps(void *private_context)
2108 {
2109     return 1;
2110 }
2111
2112 static int
2113 pp_dndi_y_steps(void *private_context)
2114 {
2115     struct pp_dndi_context *pp_dndi_context = private_context;
2116
2117     return pp_dndi_context->dest_h / 4;
2118 }
2119
2120 static int
2121 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2122 {
2123     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2124
2125     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2126     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
2127
2128     return 0;
2129 }
2130
2131 static VAStatus
2132 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2133                         const struct i965_surface *src_surface,
2134                         const VARectangle *src_rect,
2135                         struct i965_surface *dst_surface,
2136                         const VARectangle *dst_rect,
2137                         void *filter_param)
2138 {
2139     struct i965_driver_data *i965 = i965_driver_data(ctx);
2140     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
2141     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2142     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2143     struct object_surface *obj_surface;
2144     struct i965_sampler_dndi *sampler_dndi;
2145     int index;
2146     int w, h;
2147     int orig_w, orig_h;
2148     int dndi_top_first = 1;
2149
2150     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
2151         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
2152
2153     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
2154         dndi_top_first = 1;
2155     else
2156         dndi_top_first = 0;
2157
2158     /* surface */
2159     obj_surface = SURFACE(src_surface->id);
2160     orig_w = obj_surface->orig_width;
2161     orig_h = obj_surface->orig_height;
2162     w = obj_surface->width;
2163     h = obj_surface->height;
2164
2165     if (pp_context->stmm.bo == NULL) {
2166         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2167                                            "STMM surface",
2168                                            w * h,
2169                                            4096);
2170         assert(pp_context->stmm.bo);
2171     }
2172
2173     /* source UV surface index 2 */
2174     i965_pp_set_surface_state(ctx, pp_context,
2175                               obj_surface->bo, w * h,
2176                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2177                               2, 0);
2178
2179     /* source YUV surface index 4 */
2180     i965_pp_set_surface2_state(ctx, pp_context,
2181                                obj_surface->bo, 0,
2182                                orig_w, orig_h, w,
2183                                0, h,
2184                                SURFACE_FORMAT_PLANAR_420_8, 1,
2185                                4);
2186
2187     /* source STMM surface index 20 */
2188     i965_pp_set_surface_state(ctx, pp_context,
2189                               pp_context->stmm.bo, 0,
2190                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2191                               20, 1);
2192
2193     /* destination surface */
2194     obj_surface = SURFACE(dst_surface->id);
2195     orig_w = obj_surface->orig_width;
2196     orig_h = obj_surface->orig_height;
2197     w = obj_surface->width;
2198     h = obj_surface->height;
2199
2200     /* destination Y surface index 7 */
2201     i965_pp_set_surface_state(ctx, pp_context,
2202                               obj_surface->bo, 0,
2203                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2204                               7, 1);
2205
2206     /* destination UV surface index 8 */
2207     i965_pp_set_surface_state(ctx, pp_context,
2208                               obj_surface->bo, w * h,
2209                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2210                               8, 1);
2211     /* sampler dndi */
2212     dri_bo_map(pp_context->sampler_state_table.bo, True);
2213     assert(pp_context->sampler_state_table.bo->virtual);
2214     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2215     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2216
2217     /* sample dndi index 1 */
2218     index = 0;
2219     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2220     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2221     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2222     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2223
2224     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2225     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 4;
2226     sampler_dndi[index].dw1.stmm_c2 = 1;
2227     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2228     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2229
2230     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
2231     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2232     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2233     sampler_dndi[index].dw2.good_neighbor_threshold = 4;                // 0-63
2234
2235     sampler_dndi[index].dw3.maximum_stmm = 128;
2236     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2237     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2238     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2239     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2240
2241     sampler_dndi[index].dw4.sdi_delta = 8;
2242     sampler_dndi[index].dw4.sdi_threshold = 128;
2243     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2244     sampler_dndi[index].dw4.stmm_shift_up = 0;
2245     sampler_dndi[index].dw4.stmm_shift_down = 0;
2246     sampler_dndi[index].dw4.minimum_stmm = 0;
2247
2248     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 8;
2249     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 32;
2250     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 64;
2251     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 32;
2252
2253     sampler_dndi[index].dw6.dn_enable = 1;
2254     sampler_dndi[index].dw6.di_enable = 1;
2255     sampler_dndi[index].dw6.di_partial = 0;
2256     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2257     sampler_dndi[index].dw6.dndi_stream_id = 0;
2258     sampler_dndi[index].dw6.dndi_first_frame = 1;
2259     sampler_dndi[index].dw6.progressive_dn = 0;
2260     sampler_dndi[index].dw6.fmd_tear_threshold = 63;
2261     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2262     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2263
2264     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
2265     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
2266     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2267     sampler_dndi[index].dw7.column_width_minus1 = 0;
2268
2269     dri_bo_unmap(pp_context->sampler_state_table.bo);
2270
2271     /* private function & data */
2272     pp_context->pp_x_steps = pp_dndi_x_steps;
2273     pp_context->pp_y_steps = pp_dndi_y_steps;
2274     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
2275
2276     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
2277     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
2278     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
2279     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
2280
2281     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
2282     pp_inline_parameter->grf5.number_blocks = w / 16;
2283     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2284     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2285
2286     pp_dndi_context->dest_w = w;
2287     pp_dndi_context->dest_h = h;
2288
2289     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
2290
2291     return VA_STATUS_SUCCESS;
2292 }
2293
2294 static int
2295 pp_dn_x_steps(void *private_context)
2296 {
2297     return 1;
2298 }
2299
2300 static int
2301 pp_dn_y_steps(void *private_context)
2302 {
2303     struct pp_dn_context *pp_dn_context = private_context;
2304
2305     return pp_dn_context->dest_h / 8;
2306 }
2307
2308 static int
2309 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2310 {
2311     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2312
2313     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2314     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
2315
2316     return 0;
2317 }
2318
2319 static VAStatus
2320 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2321                       const struct i965_surface *src_surface,
2322                       const VARectangle *src_rect,
2323                       struct i965_surface *dst_surface,
2324                       const VARectangle *dst_rect,
2325                       void *filter_param)
2326 {
2327     struct i965_driver_data *i965 = i965_driver_data(ctx);
2328     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
2329     struct object_surface *obj_surface;
2330     struct i965_sampler_dndi *sampler_dndi;
2331     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2332     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2333     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
2334     int index;
2335     int w, h;
2336     int orig_w, orig_h;
2337     int dn_strength = 15;
2338     int dndi_top_first = 1;
2339     int dn_progressive = 0;
2340
2341     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
2342         dndi_top_first = 1;
2343         dn_progressive = 1;
2344     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
2345         dndi_top_first = 1;
2346         dn_progressive = 0;
2347     } else {
2348         dndi_top_first = 0;
2349         dn_progressive = 0;
2350     }
2351
2352     if (dn_filter_param) {
2353         float value = dn_filter_param->value;
2354         
2355         if (value > 1.0)
2356             value = 1.0;
2357         
2358         if (value < 0.0)
2359             value = 0.0;
2360
2361         dn_strength = (int)(value * 31.0F);
2362     }
2363
2364     /* surface */
2365     obj_surface = SURFACE(src_surface->id);
2366     orig_w = obj_surface->orig_width;
2367     orig_h = obj_surface->orig_height;
2368     w = obj_surface->width;
2369     h = obj_surface->height;
2370
2371     if (pp_context->stmm.bo == NULL) {
2372         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2373                                            "STMM surface",
2374                                            w * h,
2375                                            4096);
2376         assert(pp_context->stmm.bo);
2377     }
2378
2379     /* source UV surface index 2 */
2380     i965_pp_set_surface_state(ctx, pp_context,
2381                               obj_surface->bo, w * h,
2382                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2383                               2, 0);
2384
2385     /* source YUV surface index 4 */
2386     i965_pp_set_surface2_state(ctx, pp_context,
2387                                obj_surface->bo, 0,
2388                                orig_w, orig_h, w,
2389                                0, h,
2390                                SURFACE_FORMAT_PLANAR_420_8, 1,
2391                                4);
2392
2393     /* source STMM surface index 20 */
2394     i965_pp_set_surface_state(ctx, pp_context,
2395                               pp_context->stmm.bo, 0,
2396                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2397                               20, 1);
2398
2399     /* destination surface */
2400     obj_surface = SURFACE(dst_surface->id);
2401     orig_w = obj_surface->orig_width;
2402     orig_h = obj_surface->orig_height;
2403     w = obj_surface->width;
2404     h = obj_surface->height;
2405
2406     /* destination Y surface index 7 */
2407     i965_pp_set_surface_state(ctx, pp_context,
2408                               obj_surface->bo, 0,
2409                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2410                               7, 1);
2411
2412     /* destination UV surface index 8 */
2413     i965_pp_set_surface_state(ctx, pp_context,
2414                               obj_surface->bo, w * h,
2415                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2416                               8, 1);
2417     /* sampler dn */
2418     dri_bo_map(pp_context->sampler_state_table.bo, True);
2419     assert(pp_context->sampler_state_table.bo->virtual);
2420     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2421     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2422
2423     /* sample dndi index 1 */
2424     index = 0;
2425     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2426     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2427     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2428     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2429
2430     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2431     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
2432     sampler_dndi[index].dw1.stmm_c2 = 0;
2433     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2434     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2435
2436     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
2437     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2438     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2439     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
2440
2441     sampler_dndi[index].dw3.maximum_stmm = 128;
2442     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2443     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2444     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2445     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2446
2447     sampler_dndi[index].dw4.sdi_delta = 8;
2448     sampler_dndi[index].dw4.sdi_threshold = 128;
2449     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2450     sampler_dndi[index].dw4.stmm_shift_up = 0;
2451     sampler_dndi[index].dw4.stmm_shift_down = 0;
2452     sampler_dndi[index].dw4.minimum_stmm = 0;
2453
2454     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
2455     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
2456     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
2457     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
2458
2459     sampler_dndi[index].dw6.dn_enable = 1;
2460     sampler_dndi[index].dw6.di_enable = 0;
2461     sampler_dndi[index].dw6.di_partial = 0;
2462     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2463     sampler_dndi[index].dw6.dndi_stream_id = 1;
2464     sampler_dndi[index].dw6.dndi_first_frame = 1;
2465     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
2466     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
2467     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2468     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2469
2470     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
2471     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
2472     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2473     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
2474
2475     dri_bo_unmap(pp_context->sampler_state_table.bo);
2476
2477     /* private function & data */
2478     pp_context->pp_x_steps = pp_dn_x_steps;
2479     pp_context->pp_y_steps = pp_dn_y_steps;
2480     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
2481
2482     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
2483     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
2484     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
2485     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
2486
2487     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
2488     pp_inline_parameter->grf5.number_blocks = w / 16;
2489     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2490     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2491
2492     pp_dn_context->dest_w = w;
2493     pp_dn_context->dest_h = h;
2494
2495     dst_surface->flags = src_surface->flags;
2496     
2497     return VA_STATUS_SUCCESS;
2498 }
2499
2500 static int
2501 gen7_pp_dndi_x_steps(void *private_context)
2502 {
2503     struct pp_dndi_context *pp_dndi_context = private_context;
2504
2505     return pp_dndi_context->dest_w / 16;
2506 }
2507
2508 static int
2509 gen7_pp_dndi_y_steps(void *private_context)
2510 {
2511     struct pp_dndi_context *pp_dndi_context = private_context;
2512
2513     return pp_dndi_context->dest_h / 4;
2514 }
2515
2516 static int
2517 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2518 {
2519     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2520
2521     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
2522     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
2523
2524     return 0;
2525 }
2526
2527 static VAStatus
2528 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2529                              const struct i965_surface *src_surface,
2530                              const VARectangle *src_rect,
2531                              struct i965_surface *dst_surface,
2532                              const VARectangle *dst_rect,
2533                              void *filter_param)
2534 {
2535     struct i965_driver_data *i965 = i965_driver_data(ctx);
2536     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
2537     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2538     struct object_surface *obj_surface;
2539     struct gen7_sampler_dndi *sampler_dndi;
2540     int index;
2541     int w, h;
2542     int orig_w, orig_h;
2543     int dndi_top_first = 1;
2544
2545     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
2546         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
2547
2548     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
2549         dndi_top_first = 1;
2550     else
2551         dndi_top_first = 0;
2552
2553     /* surface */
2554     obj_surface = SURFACE(src_surface->id);
2555     orig_w = obj_surface->orig_width;
2556     orig_h = obj_surface->orig_height;
2557     w = obj_surface->width;
2558     h = obj_surface->height;
2559
2560     if (pp_context->stmm.bo == NULL) {
2561         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2562                                            "STMM surface",
2563                                            w * h,
2564                                            4096);
2565         assert(pp_context->stmm.bo);
2566     }
2567
2568     /* source UV surface index 1 */
2569     gen7_pp_set_surface_state(ctx, pp_context,
2570                               obj_surface->bo, w * h,
2571                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2572                               1, 0);
2573
2574     /* source YUV surface index 3 */
2575     gen7_pp_set_surface2_state(ctx, pp_context,
2576                                obj_surface->bo, 0,
2577                                orig_w, orig_h, w,
2578                                0, h,
2579                                SURFACE_FORMAT_PLANAR_420_8, 1,
2580                                3);
2581
2582     /* source (temporal reference) YUV surface index 4 */
2583     gen7_pp_set_surface2_state(ctx, pp_context,
2584                                obj_surface->bo, 0,
2585                                orig_w, orig_h, w,
2586                                0, h,
2587                                SURFACE_FORMAT_PLANAR_420_8, 1,
2588                                4);
2589
2590     /* STMM / History Statistics input surface, index 5 */
2591     gen7_pp_set_surface_state(ctx, pp_context,
2592                               pp_context->stmm.bo, 0,
2593                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2594                               5, 1);
2595
2596     /* destination surface */
2597     obj_surface = SURFACE(dst_surface->id);
2598     orig_w = obj_surface->orig_width;
2599     orig_h = obj_surface->orig_height;
2600     w = obj_surface->width;
2601     h = obj_surface->height;
2602
2603     /* destination(Previous frame) Y surface index 27 */
2604     gen7_pp_set_surface_state(ctx, pp_context,
2605                               obj_surface->bo, 0,
2606                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2607                               27, 1);
2608
2609     /* destination(Previous frame) UV surface index 28 */
2610     gen7_pp_set_surface_state(ctx, pp_context,
2611                               obj_surface->bo, w * h,
2612                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2613                               28, 1);
2614
2615     /* destination(Current frame) Y surface index 30 */
2616     gen7_pp_set_surface_state(ctx, pp_context,
2617                               obj_surface->bo, 0,
2618                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2619                               30, 1);
2620
2621     /* destination(Current frame) UV surface index 31 */
2622     gen7_pp_set_surface_state(ctx, pp_context,
2623                               obj_surface->bo, w * h,
2624                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2625                               31, 1);
2626
2627     /* STMM output surface, index 33 */
2628     gen7_pp_set_surface_state(ctx, pp_context,
2629                               pp_context->stmm.bo, 0,
2630                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2631                               33, 1);
2632
2633
2634     /* sampler dndi */
2635     dri_bo_map(pp_context->sampler_state_table.bo, True);
2636     assert(pp_context->sampler_state_table.bo->virtual);
2637     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2638     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2639
2640     /* sample dndi index 0 */
2641     index = 0;
2642     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2643     sampler_dndi[index].dw0.dnmh_delt = 8;
2644     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
2645     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
2646     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2647     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2648
2649     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2650     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
2651     sampler_dndi[index].dw1.stmm_c2 = 0;
2652     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2653     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2654
2655     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
2656     sampler_dndi[index].dw2.bne_edge_th = 1;
2657     sampler_dndi[index].dw2.smooth_mv_th = 0;
2658     sampler_dndi[index].dw2.sad_tight_th = 5;
2659     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
2660     sampler_dndi[index].dw2.good_neighbor_th = 4;
2661
2662     sampler_dndi[index].dw3.maximum_stmm = 128;
2663     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2664     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2665     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2666     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2667
2668     sampler_dndi[index].dw4.sdi_delta = 8;
2669     sampler_dndi[index].dw4.sdi_threshold = 128;
2670     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2671     sampler_dndi[index].dw4.stmm_shift_up = 0;
2672     sampler_dndi[index].dw4.stmm_shift_down = 0;
2673     sampler_dndi[index].dw4.minimum_stmm = 0;
2674
2675     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
2676     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
2677     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
2678     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
2679
2680     sampler_dndi[index].dw6.dn_enable = 0;
2681     sampler_dndi[index].dw6.di_enable = 1;
2682     sampler_dndi[index].dw6.di_partial = 0;
2683     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2684     sampler_dndi[index].dw6.dndi_stream_id = 1;
2685     sampler_dndi[index].dw6.dndi_first_frame = 1;
2686     sampler_dndi[index].dw6.progressive_dn = 0;
2687     sampler_dndi[index].dw6.mcdi_enable = 0;
2688     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
2689     sampler_dndi[index].dw6.cat_th1 = 0;
2690     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2691     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2692
2693     sampler_dndi[index].dw7.sad_tha = 5;
2694     sampler_dndi[index].dw7.sad_thb = 10;
2695     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
2696     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
2697     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
2698     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2699     sampler_dndi[index].dw7.neighborpixel_th = 10;
2700     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
2701
2702     dri_bo_unmap(pp_context->sampler_state_table.bo);
2703
2704     /* private function & data */
2705     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
2706     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
2707     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
2708
2709     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
2710     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
2711     pp_static_parameter->grf1.di_top_field_first = 0;
2712     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2713
2714     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
2715     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
2716     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
2717
2718     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
2719     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
2720
2721     pp_dndi_context->dest_w = w;
2722     pp_dndi_context->dest_h = h;
2723
2724     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
2725
2726     return VA_STATUS_SUCCESS;
2727 }
2728
2729 static int
2730 gen7_pp_dn_x_steps(void *private_context)
2731 {
2732     return 1;
2733 }
2734
2735 static int
2736 gen7_pp_dn_y_steps(void *private_context)
2737 {
2738     struct pp_dn_context *pp_dn_context = private_context;
2739
2740     return pp_dn_context->dest_h / 4;
2741 }
2742
2743 static int
2744 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2745 {
2746     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2747
2748     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2749     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
2750
2751     return 0;
2752 }
2753
2754 static VAStatus
2755 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2756                            const struct i965_surface *src_surface,
2757                            const VARectangle *src_rect,
2758                            struct i965_surface *dst_surface,
2759                            const VARectangle *dst_rect,
2760                            void *filter_param)
2761 {
2762     struct i965_driver_data *i965 = i965_driver_data(ctx);
2763     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
2764     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2765     struct object_surface *obj_surface;
2766     struct gen7_sampler_dndi *sampler_dn;
2767     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
2768     int index;
2769     int w, h;
2770     int orig_w, orig_h;
2771     int dn_strength = 15;
2772     int dndi_top_first = 1;
2773     int dn_progressive = 0;
2774
2775     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
2776         dndi_top_first = 1;
2777         dn_progressive = 1;
2778     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
2779         dndi_top_first = 1;
2780         dn_progressive = 0;
2781     } else {
2782         dndi_top_first = 0;
2783         dn_progressive = 0;
2784     }
2785
2786     if (dn_filter_param) {
2787         float value = dn_filter_param->value;
2788         
2789         if (value > 1.0)
2790             value = 1.0;
2791         
2792         if (value < 0.0)
2793             value = 0.0;
2794
2795         dn_strength = (int)(value * 31.0F);
2796     }
2797
2798     /* surface */
2799     obj_surface = SURFACE(src_surface->id);
2800     orig_w = obj_surface->orig_width;
2801     orig_h = obj_surface->orig_height;
2802     w = obj_surface->width;
2803     h = obj_surface->height;
2804
2805     if (pp_context->stmm.bo == NULL) {
2806         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2807                                            "STMM surface",
2808                                            w * h,
2809                                            4096);
2810         assert(pp_context->stmm.bo);
2811     }
2812
2813     /* source UV surface index 1 */
2814     gen7_pp_set_surface_state(ctx, pp_context,
2815                               obj_surface->bo, w * h,
2816                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2817                               1, 0);
2818
2819     /* source YUV surface index 3 */
2820     gen7_pp_set_surface2_state(ctx, pp_context,
2821                                obj_surface->bo, 0,
2822                                orig_w, orig_h, w,
2823                                0, h,
2824                                SURFACE_FORMAT_PLANAR_420_8, 1,
2825                                3);
2826
2827     /* source STMM surface index 5 */
2828     gen7_pp_set_surface_state(ctx, pp_context,
2829                               pp_context->stmm.bo, 0,
2830                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2831                               5, 1);
2832
2833     /* destination surface */
2834     obj_surface = SURFACE(dst_surface->id);
2835     orig_w = obj_surface->orig_width;
2836     orig_h = obj_surface->orig_height;
2837     w = obj_surface->width;
2838     h = obj_surface->height;
2839
2840     /* destination Y surface index 7 */
2841     gen7_pp_set_surface_state(ctx, pp_context,
2842                               obj_surface->bo, 0,
2843                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2844                               7, 1);
2845
2846     /* destination UV surface index 8 */
2847     gen7_pp_set_surface_state(ctx, pp_context,
2848                               obj_surface->bo, w * h,
2849                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2850                               8, 1);
2851     /* sampler dn */
2852     dri_bo_map(pp_context->sampler_state_table.bo, True);
2853     assert(pp_context->sampler_state_table.bo->virtual);
2854     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
2855     sampler_dn = pp_context->sampler_state_table.bo->virtual;
2856
2857     /* sample dn index 1 */
2858     index = 0;
2859     sampler_dn[index].dw0.denoise_asd_threshold = 0;
2860     sampler_dn[index].dw0.dnmh_delt = 8;
2861     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
2862     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
2863     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
2864     sampler_dn[index].dw0.denoise_stad_threshold = 0;
2865
2866     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2867     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
2868     sampler_dn[index].dw1.stmm_c2 = 0;
2869     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
2870     sampler_dn[index].dw1.temporal_difference_threshold = 16;
2871
2872     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
2873     sampler_dn[index].dw2.bne_edge_th = 1;
2874     sampler_dn[index].dw2.smooth_mv_th = 0;
2875     sampler_dn[index].dw2.sad_tight_th = 5;
2876     sampler_dn[index].dw2.cat_slope_minus1 = 9;
2877     sampler_dn[index].dw2.good_neighbor_th = 4;
2878
2879     sampler_dn[index].dw3.maximum_stmm = 128;
2880     sampler_dn[index].dw3.multipler_for_vecm = 2;
2881     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2882     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2883     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
2884
2885     sampler_dn[index].dw4.sdi_delta = 8;
2886     sampler_dn[index].dw4.sdi_threshold = 128;
2887     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2888     sampler_dn[index].dw4.stmm_shift_up = 0;
2889     sampler_dn[index].dw4.stmm_shift_down = 0;
2890     sampler_dn[index].dw4.minimum_stmm = 0;
2891
2892     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
2893     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
2894     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
2895     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
2896
2897     sampler_dn[index].dw6.dn_enable = 1;
2898     sampler_dn[index].dw6.di_enable = 0;
2899     sampler_dn[index].dw6.di_partial = 0;
2900     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
2901     sampler_dn[index].dw6.dndi_stream_id = 1;
2902     sampler_dn[index].dw6.dndi_first_frame = 1;
2903     sampler_dn[index].dw6.progressive_dn = dn_progressive;
2904     sampler_dn[index].dw6.mcdi_enable = 0;
2905     sampler_dn[index].dw6.fmd_tear_threshold = 32;
2906     sampler_dn[index].dw6.cat_th1 = 0;
2907     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
2908     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
2909
2910     sampler_dn[index].dw7.sad_tha = 5;
2911     sampler_dn[index].dw7.sad_thb = 10;
2912     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
2913     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
2914     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
2915     sampler_dn[index].dw7.vdi_walker_enable = 0;
2916     sampler_dn[index].dw7.neighborpixel_th = 10;
2917     sampler_dn[index].dw7.column_width_minus1 = w / 16;
2918
2919     dri_bo_unmap(pp_context->sampler_state_table.bo);
2920
2921     /* private function & data */
2922     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
2923     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
2924     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
2925
2926     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
2927     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
2928     pp_static_parameter->grf1.di_top_field_first = 0;
2929     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2930
2931     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
2932     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
2933     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
2934
2935     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
2936     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
2937
2938     pp_dn_context->dest_w = w;
2939     pp_dn_context->dest_h = h;
2940
2941     dst_surface->flags = src_surface->flags;
2942
2943     return VA_STATUS_SUCCESS;
2944 }
2945
2946 static VAStatus
2947 ironlake_pp_initialize(
2948     VADriverContextP   ctx,
2949     struct i965_post_processing_context *pp_context,
2950     const struct i965_surface *src_surface,
2951     const VARectangle *src_rect,
2952     struct i965_surface *dst_surface,
2953     const VARectangle *dst_rect,
2954     int                pp_index,
2955     void *filter_param
2956 )
2957 {
2958     VAStatus va_status;
2959     struct i965_driver_data *i965 = i965_driver_data(ctx);
2960     struct pp_module *pp_module;
2961     dri_bo *bo;
2962     int static_param_size, inline_param_size;
2963
2964     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
2965     bo = dri_bo_alloc(i965->intel.bufmgr,
2966                       "surface state & binding table",
2967                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
2968                       4096);
2969     assert(bo);
2970     pp_context->surface_state_binding_table.bo = bo;
2971
2972     dri_bo_unreference(pp_context->curbe.bo);
2973     bo = dri_bo_alloc(i965->intel.bufmgr,
2974                       "constant buffer",
2975                       4096, 
2976                       4096);
2977     assert(bo);
2978     pp_context->curbe.bo = bo;
2979
2980     dri_bo_unreference(pp_context->idrt.bo);
2981     bo = dri_bo_alloc(i965->intel.bufmgr, 
2982                       "interface discriptor", 
2983                       sizeof(struct i965_interface_descriptor), 
2984                       4096);
2985     assert(bo);
2986     pp_context->idrt.bo = bo;
2987     pp_context->idrt.num_interface_descriptors = 0;
2988
2989     dri_bo_unreference(pp_context->sampler_state_table.bo);
2990     bo = dri_bo_alloc(i965->intel.bufmgr, 
2991                       "sampler state table", 
2992                       4096,
2993                       4096);
2994     assert(bo);
2995     dri_bo_map(bo, True);
2996     memset(bo->virtual, 0, bo->size);
2997     dri_bo_unmap(bo);
2998     pp_context->sampler_state_table.bo = bo;
2999
3000     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3001     bo = dri_bo_alloc(i965->intel.bufmgr, 
3002                       "sampler 8x8 state ",
3003                       4096,
3004                       4096);
3005     assert(bo);
3006     pp_context->sampler_state_table.bo_8x8 = bo;
3007
3008     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3009     bo = dri_bo_alloc(i965->intel.bufmgr, 
3010                       "sampler 8x8 state ",
3011                       4096,
3012                       4096);
3013     assert(bo);
3014     pp_context->sampler_state_table.bo_8x8_uv = bo;
3015
3016     dri_bo_unreference(pp_context->vfe_state.bo);
3017     bo = dri_bo_alloc(i965->intel.bufmgr, 
3018                       "vfe state", 
3019                       sizeof(struct i965_vfe_state), 
3020                       4096);
3021     assert(bo);
3022     pp_context->vfe_state.bo = bo;
3023
3024     if (IS_GEN7(i965->intel.device_id)) {
3025         static_param_size = sizeof(struct gen7_pp_static_parameter);
3026         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
3027     } else {
3028         static_param_size = sizeof(struct pp_static_parameter);
3029         inline_param_size = sizeof(struct pp_inline_parameter);
3030     }
3031
3032     memset(pp_context->pp_static_parameter, 0, static_param_size);
3033     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
3034     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3035     pp_context->current_pp = pp_index;
3036     pp_module = &pp_context->pp_modules[pp_index];
3037     
3038     if (pp_module->initialize)
3039         va_status = pp_module->initialize(ctx, pp_context,
3040                                           src_surface,
3041                                           src_rect,
3042                                           dst_surface,
3043                                           dst_rect,
3044                                           filter_param);
3045     else
3046         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3047
3048     return va_status;
3049 }
3050
3051 static VAStatus
3052 ironlake_post_processing(
3053     VADriverContextP   ctx,
3054     struct i965_post_processing_context *pp_context,
3055     const struct i965_surface *src_surface,
3056     const VARectangle *src_rect,
3057     struct i965_surface *dst_surface,
3058     const VARectangle *dst_rect,
3059     int                pp_index,
3060     void *filter_param
3061 )
3062 {
3063     VAStatus va_status;
3064
3065     va_status = ironlake_pp_initialize(ctx, pp_context,
3066                                        src_surface,
3067                                        src_rect,
3068                                        dst_surface,
3069                                        dst_rect,
3070                                        pp_index,
3071                                        filter_param);
3072
3073     if (va_status == VA_STATUS_SUCCESS) {
3074         ironlake_pp_states_setup(ctx, pp_context);
3075         ironlake_pp_pipeline_setup(ctx, pp_context);
3076     }
3077
3078     return va_status;
3079 }
3080
3081 static VAStatus
3082 gen6_pp_initialize(
3083     VADriverContextP   ctx,
3084     struct i965_post_processing_context *pp_context,
3085     const struct i965_surface *src_surface,
3086     const VARectangle *src_rect,
3087     struct i965_surface *dst_surface,
3088     const VARectangle *dst_rect,
3089     int                pp_index,
3090     void *filter_param
3091 )
3092 {
3093     VAStatus va_status;
3094     struct i965_driver_data *i965 = i965_driver_data(ctx);
3095     struct pp_module *pp_module;
3096     dri_bo *bo;
3097     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3098     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3099
3100     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3101     bo = dri_bo_alloc(i965->intel.bufmgr,
3102                       "surface state & binding table",
3103                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3104                       4096);
3105     assert(bo);
3106     pp_context->surface_state_binding_table.bo = bo;
3107
3108     dri_bo_unreference(pp_context->curbe.bo);
3109     bo = dri_bo_alloc(i965->intel.bufmgr,
3110                       "constant buffer",
3111                       4096, 
3112                       4096);
3113     assert(bo);
3114     pp_context->curbe.bo = bo;
3115
3116     dri_bo_unreference(pp_context->idrt.bo);
3117     bo = dri_bo_alloc(i965->intel.bufmgr, 
3118                       "interface discriptor", 
3119                       sizeof(struct gen6_interface_descriptor_data), 
3120                       4096);
3121     assert(bo);
3122     pp_context->idrt.bo = bo;
3123     pp_context->idrt.num_interface_descriptors = 0;
3124
3125     dri_bo_unreference(pp_context->sampler_state_table.bo);
3126     bo = dri_bo_alloc(i965->intel.bufmgr, 
3127                       "sampler state table", 
3128                       4096,
3129                       4096);
3130     assert(bo);
3131     dri_bo_map(bo, True);
3132     memset(bo->virtual, 0, bo->size);
3133     dri_bo_unmap(bo);
3134     pp_context->sampler_state_table.bo = bo;
3135
3136     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3137     bo = dri_bo_alloc(i965->intel.bufmgr, 
3138                       "sampler 8x8 state ",
3139                       4096,
3140                       4096);
3141     assert(bo);
3142     pp_context->sampler_state_table.bo_8x8 = bo;
3143
3144     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3145     bo = dri_bo_alloc(i965->intel.bufmgr, 
3146                       "sampler 8x8 state ",
3147                       4096,
3148                       4096);
3149     assert(bo);
3150     pp_context->sampler_state_table.bo_8x8_uv = bo;
3151
3152     dri_bo_unreference(pp_context->vfe_state.bo);
3153     bo = dri_bo_alloc(i965->intel.bufmgr, 
3154                       "vfe state", 
3155                       sizeof(struct i965_vfe_state), 
3156                       4096);
3157     assert(bo);
3158     pp_context->vfe_state.bo = bo;
3159     
3160     memset(pp_static_parameter, 0, sizeof(*pp_static_parameter));
3161     memset(pp_inline_parameter, 0, sizeof(*pp_inline_parameter));
3162     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3163     pp_context->current_pp = pp_index;
3164     pp_module = &pp_context->pp_modules[pp_index];
3165     
3166     if (pp_module->initialize)
3167         va_status = pp_module->initialize(ctx, pp_context,
3168                                           src_surface,
3169                                           src_rect,
3170                                           dst_surface,
3171                                           dst_rect,
3172                                           filter_param);
3173     else
3174         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3175
3176     return va_status;
3177 }
3178
3179 static void
3180 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
3181                                    struct i965_post_processing_context *pp_context)
3182 {
3183     struct i965_driver_data *i965 = i965_driver_data(ctx);
3184     struct gen6_interface_descriptor_data *desc;
3185     dri_bo *bo;
3186     int pp_index = pp_context->current_pp;
3187
3188     bo = pp_context->idrt.bo;
3189     dri_bo_map(bo, True);
3190     assert(bo->virtual);
3191     desc = bo->virtual;
3192     memset(desc, 0, sizeof(*desc));
3193     desc->desc0.kernel_start_pointer = 
3194         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
3195     desc->desc1.single_program_flow = 1;
3196     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
3197     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
3198     desc->desc2.sampler_state_pointer = 
3199         pp_context->sampler_state_table.bo->offset >> 5;
3200     desc->desc3.binding_table_entry_count = 0;
3201     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
3202     desc->desc4.constant_urb_entry_read_offset = 0;
3203
3204     if (IS_GEN7(i965->intel.device_id))
3205         desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
3206     else
3207         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
3208
3209     dri_bo_emit_reloc(bo,
3210                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3211                       0,
3212                       offsetof(struct gen6_interface_descriptor_data, desc0),
3213                       pp_context->pp_modules[pp_index].kernel.bo);
3214
3215     dri_bo_emit_reloc(bo,
3216                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3217                       desc->desc2.sampler_count << 2,
3218                       offsetof(struct gen6_interface_descriptor_data, desc2),
3219                       pp_context->sampler_state_table.bo);
3220
3221     dri_bo_unmap(bo);
3222     pp_context->idrt.num_interface_descriptors++;
3223 }
3224
3225 static void
3226 gen6_pp_upload_constants(VADriverContextP ctx,
3227                          struct i965_post_processing_context *pp_context)
3228 {
3229     struct i965_driver_data *i965 = i965_driver_data(ctx);
3230     unsigned char *constant_buffer;
3231     int param_size;
3232
3233     assert(sizeof(struct pp_static_parameter) == 128);
3234     assert(sizeof(struct gen7_pp_static_parameter) == 192);
3235
3236     if (IS_GEN7(i965->intel.device_id))
3237         param_size = sizeof(struct gen7_pp_static_parameter);
3238     else
3239         param_size = sizeof(struct pp_static_parameter);
3240
3241     dri_bo_map(pp_context->curbe.bo, 1);
3242     assert(pp_context->curbe.bo->virtual);
3243     constant_buffer = pp_context->curbe.bo->virtual;
3244     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
3245     dri_bo_unmap(pp_context->curbe.bo);
3246 }
3247
3248 static void
3249 gen6_pp_states_setup(VADriverContextP ctx,
3250                      struct i965_post_processing_context *pp_context)
3251 {
3252     gen6_pp_interface_descriptor_table(ctx, pp_context);
3253     gen6_pp_upload_constants(ctx, pp_context);
3254 }
3255
3256 static void
3257 gen6_pp_pipeline_select(VADriverContextP ctx,
3258                         struct i965_post_processing_context *pp_context)
3259 {
3260     struct intel_batchbuffer *batch = pp_context->batch;
3261
3262     BEGIN_BATCH(batch, 1);
3263     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
3264     ADVANCE_BATCH(batch);
3265 }
3266
3267 static void
3268 gen6_pp_state_base_address(VADriverContextP ctx,
3269                            struct i965_post_processing_context *pp_context)
3270 {
3271     struct intel_batchbuffer *batch = pp_context->batch;
3272
3273     BEGIN_BATCH(batch, 10);
3274     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
3275     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3276     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
3277     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3278     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3279     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3280     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3281     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3282     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3283     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3284     ADVANCE_BATCH(batch);
3285 }
3286
3287 static void
3288 gen6_pp_vfe_state(VADriverContextP ctx,
3289                   struct i965_post_processing_context *pp_context)
3290 {
3291     struct intel_batchbuffer *batch = pp_context->batch;
3292
3293     BEGIN_BATCH(batch, 8);
3294     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
3295     OUT_BATCH(batch, 0);
3296     OUT_BATCH(batch,
3297               (pp_context->urb.num_vfe_entries - 1) << 16 |
3298               pp_context->urb.num_vfe_entries << 8);
3299     OUT_BATCH(batch, 0);
3300     OUT_BATCH(batch,
3301               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* URB Entry Allocation Size, in 256 bits unit */
3302               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2)); /* CURBE Allocation Size, in 256 bits unit */
3303     OUT_BATCH(batch, 0);
3304     OUT_BATCH(batch, 0);
3305     OUT_BATCH(batch, 0);
3306     ADVANCE_BATCH(batch);
3307 }
3308
3309 static void
3310 gen6_pp_curbe_load(VADriverContextP ctx,
3311                    struct i965_post_processing_context *pp_context)
3312 {
3313     struct intel_batchbuffer *batch = pp_context->batch;
3314
3315     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32 <= pp_context->curbe.bo->size);
3316
3317     BEGIN_BATCH(batch, 4);
3318     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
3319     OUT_BATCH(batch, 0);
3320     OUT_BATCH(batch,
3321               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32);
3322     OUT_RELOC(batch, 
3323               pp_context->curbe.bo,
3324               I915_GEM_DOMAIN_INSTRUCTION, 0,
3325               0);
3326     ADVANCE_BATCH(batch);
3327 }
3328
3329 static void
3330 gen6_interface_descriptor_load(VADriverContextP ctx,
3331                                struct i965_post_processing_context *pp_context)
3332 {
3333     struct intel_batchbuffer *batch = pp_context->batch;
3334
3335     BEGIN_BATCH(batch, 4);
3336     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
3337     OUT_BATCH(batch, 0);
3338     OUT_BATCH(batch,
3339               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
3340     OUT_RELOC(batch, 
3341               pp_context->idrt.bo,
3342               I915_GEM_DOMAIN_INSTRUCTION, 0,
3343               0);
3344     ADVANCE_BATCH(batch);
3345 }
3346
3347 static void
3348 gen6_pp_object_walker(VADriverContextP ctx,
3349                       struct i965_post_processing_context *pp_context)
3350 {
3351     struct i965_driver_data *i965 = i965_driver_data(ctx);
3352     struct intel_batchbuffer *batch = pp_context->batch;
3353     int x, x_steps, y, y_steps;
3354     int param_size, command_length_in_dws;
3355     dri_bo *command_buffer;
3356     unsigned int *command_ptr;
3357
3358     if (IS_GEN7(i965->intel.device_id))
3359         param_size = sizeof(struct gen7_pp_inline_parameter);
3360     else
3361         param_size = sizeof(struct pp_inline_parameter);
3362
3363     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
3364     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
3365     command_length_in_dws = 6 + (param_size >> 2);
3366     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
3367                                   "command objects buffer",
3368                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
3369                                   4096);
3370
3371     dri_bo_map(command_buffer, 1);
3372     command_ptr = command_buffer->virtual;
3373
3374     for (y = 0; y < y_steps; y++) {
3375         for (x = 0; x < x_steps; x++) {
3376             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
3377                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
3378                 *command_ptr++ = 0;
3379                 *command_ptr++ = 0;
3380                 *command_ptr++ = 0;
3381                 *command_ptr++ = 0;
3382                 *command_ptr++ = 0;
3383                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
3384                 command_ptr += (param_size >> 2);
3385             }
3386         }
3387     }
3388
3389     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
3390         *command_ptr++ = 0;
3391
3392     *command_ptr = MI_BATCH_BUFFER_END;
3393
3394     dri_bo_unmap(command_buffer);
3395
3396     BEGIN_BATCH(batch, 2);
3397     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
3398     OUT_RELOC(batch, command_buffer, 
3399               I915_GEM_DOMAIN_COMMAND, 0, 
3400               0);
3401     ADVANCE_BATCH(batch);
3402     
3403     dri_bo_unreference(command_buffer);
3404
3405     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
3406      * will cause control to pass back to ring buffer 
3407      */
3408     intel_batchbuffer_end_atomic(batch);
3409     intel_batchbuffer_flush(batch);
3410     intel_batchbuffer_start_atomic(batch, 0x1000);
3411 }
3412
3413 static void
3414 gen6_pp_pipeline_setup(VADriverContextP ctx,
3415                        struct i965_post_processing_context *pp_context)
3416 {
3417     struct intel_batchbuffer *batch = pp_context->batch;
3418
3419     intel_batchbuffer_start_atomic(batch, 0x1000);
3420     intel_batchbuffer_emit_mi_flush(batch);
3421     gen6_pp_pipeline_select(ctx, pp_context);
3422     gen6_pp_state_base_address(ctx, pp_context);
3423     gen6_pp_vfe_state(ctx, pp_context);
3424     gen6_pp_curbe_load(ctx, pp_context);
3425     gen6_interface_descriptor_load(ctx, pp_context);
3426     gen6_pp_object_walker(ctx, pp_context);
3427     intel_batchbuffer_end_atomic(batch);
3428 }
3429
3430 static VAStatus
3431 gen6_post_processing(
3432     VADriverContextP   ctx,
3433     struct i965_post_processing_context *pp_context,
3434     const struct i965_surface *src_surface,
3435     const VARectangle *src_rect,
3436     struct i965_surface *dst_surface,
3437     const VARectangle *dst_rect,
3438     int                pp_index,
3439     void * filter_param
3440 )
3441 {
3442     VAStatus va_status;
3443     
3444     va_status = gen6_pp_initialize(ctx, pp_context,
3445                                    src_surface,
3446                                    src_rect,
3447                                    dst_surface,
3448                                    dst_rect,
3449                                    pp_index,
3450                                    filter_param);
3451
3452     if (va_status == VA_STATUS_SUCCESS) {
3453         gen6_pp_states_setup(ctx, pp_context);
3454         gen6_pp_pipeline_setup(ctx, pp_context);
3455     }
3456
3457     return va_status;
3458 }
3459
3460 static VAStatus
3461 i965_post_processing_internal(
3462     VADriverContextP   ctx,
3463     struct i965_post_processing_context *pp_context,
3464     const struct i965_surface *src_surface,
3465     const VARectangle *src_rect,
3466     struct i965_surface *dst_surface,
3467     const VARectangle *dst_rect,
3468     int                pp_index,
3469     void *filter_param
3470 )
3471 {
3472     struct i965_driver_data *i965 = i965_driver_data(ctx);
3473     VAStatus va_status;
3474
3475     if (IS_GEN6(i965->intel.device_id) ||
3476         IS_GEN7(i965->intel.device_id))
3477         va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
3478     else
3479         va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
3480     
3481     return va_status;
3482 }
3483
3484 VAStatus 
3485 i965_DestroySurfaces(VADriverContextP ctx,
3486                      VASurfaceID *surface_list,
3487                      int num_surfaces);
3488 VAStatus 
3489 i965_CreateSurfaces(VADriverContextP ctx,
3490                     int width,
3491                     int height,
3492                     int format,
3493                     int num_surfaces,
3494                     VASurfaceID *surfaces);
3495
3496 static void 
3497 i965_vpp_clear_surface(VADriverContextP ctx,
3498                        struct i965_post_processing_context *pp_context,
3499                        VASurfaceID surface,
3500                        unsigned int color)
3501 {
3502     struct i965_driver_data *i965 = i965_driver_data(ctx);
3503     struct intel_batchbuffer *batch = pp_context->batch;
3504     struct object_surface *obj_surface = SURFACE(surface);
3505     unsigned int blt_cmd, br13;
3506     unsigned int tiling = 0, swizzle = 0;
3507     int pitch;
3508
3509     /* Currently only support NV12 surface */
3510     if (!obj_surface || obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
3511         return;
3512
3513     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
3514     blt_cmd = XY_COLOR_BLT_CMD;
3515     pitch = obj_surface->width;
3516
3517     if (tiling != I915_TILING_NONE) {
3518         blt_cmd |= XY_COLOR_BLT_DST_TILED;
3519         pitch >>= 2;
3520     }
3521
3522     br13 = 0xf0 << 16;
3523     br13 |= BR13_8;
3524     br13 |= pitch;
3525
3526     if (IS_GEN6(i965->intel.device_id) ||
3527         IS_GEN7(i965->intel.device_id)) {
3528         intel_batchbuffer_start_atomic_blt(batch, 48);
3529         BEGIN_BLT_BATCH(batch, 12);
3530     } else {
3531         intel_batchbuffer_start_atomic(batch, 48);
3532         BEGIN_BATCH(batch, 12);
3533     }
3534
3535     OUT_BATCH(batch, blt_cmd);
3536     OUT_BATCH(batch, br13);
3537     OUT_BATCH(batch,
3538               0 << 16 |
3539               0);
3540     OUT_BATCH(batch,
3541               obj_surface->height << 16 |
3542               obj_surface->width);
3543     OUT_RELOC(batch, obj_surface->bo, 
3544               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
3545               0);
3546     OUT_BATCH(batch, 0x10);
3547
3548     OUT_BATCH(batch, blt_cmd);
3549     OUT_BATCH(batch, br13);
3550     OUT_BATCH(batch,
3551               0 << 16 |
3552               0);
3553     OUT_BATCH(batch,
3554               obj_surface->height / 2 << 16 |
3555               obj_surface->width);
3556     OUT_RELOC(batch, obj_surface->bo, 
3557               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
3558               obj_surface->width * obj_surface->y_cb_offset);
3559     OUT_BATCH(batch, 0x80);
3560
3561     ADVANCE_BATCH(batch);
3562     intel_batchbuffer_end_atomic(batch);
3563 }
3564
3565 VASurfaceID
3566 i965_post_processing(
3567     VADriverContextP   ctx,
3568     VASurfaceID        surface,
3569     const VARectangle *src_rect,
3570     const VARectangle *dst_rect,
3571     unsigned int       flags,
3572     int               *has_done_scaling  
3573 )
3574 {
3575     struct i965_driver_data *i965 = i965_driver_data(ctx);
3576     VASurfaceID in_surface_id = surface;
3577     VASurfaceID out_surface_id = VA_INVALID_ID;
3578     
3579     *has_done_scaling = 0;
3580
3581     if (HAS_PP(i965)) {
3582         struct object_surface *obj_surface;
3583         VAStatus status;
3584         struct i965_surface src_surface;
3585         struct i965_surface dst_surface;
3586
3587         obj_surface = SURFACE(in_surface_id);
3588
3589         /* Currently only support post processing for NV12 surface */
3590         if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
3591             return out_surface_id;
3592
3593         _i965LockMutex(&i965->pp_mutex);
3594
3595         if (flags & I965_PP_FLAG_MCDI) {
3596             status = i965_CreateSurfaces(ctx,
3597                                          obj_surface->orig_width,
3598                                          obj_surface->orig_height,
3599                                          VA_RT_FORMAT_YUV420,
3600                                          1,
3601                                          &out_surface_id);
3602             assert(status == VA_STATUS_SUCCESS);
3603             obj_surface = SURFACE(out_surface_id);
3604             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
3605             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
3606             src_surface.id = in_surface_id;
3607             src_surface.type = I965_SURFACE_TYPE_SURFACE;
3608             src_surface.flags = (flags & I965_PP_FLAG_TOP_FIELD) ? 
3609                 I965_SURFACE_FLAG_TOP_FIELD_FIRST : I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST;
3610             dst_surface.id = out_surface_id;
3611             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
3612             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
3613
3614             i965_post_processing_internal(ctx, i965->pp_context,
3615                                           &src_surface,
3616                                           src_rect,
3617                                           &dst_surface,
3618                                           dst_rect,
3619                                           PP_NV12_DNDI,
3620                                           NULL);
3621         }
3622
3623         if (flags & I965_PP_FLAG_AVS) {
3624             struct i965_render_state *render_state = &i965->render_state;
3625             struct intel_region *dest_region = render_state->draw_region;
3626
3627             if (out_surface_id != VA_INVALID_ID)
3628                 in_surface_id = out_surface_id;
3629
3630             status = i965_CreateSurfaces(ctx,
3631                                          dest_region->width,
3632                                          dest_region->height,
3633                                          VA_RT_FORMAT_YUV420,
3634                                          1,
3635                                          &out_surface_id);
3636             assert(status == VA_STATUS_SUCCESS);
3637             obj_surface = SURFACE(out_surface_id);
3638             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
3639             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
3640             src_surface.id = in_surface_id;
3641             src_surface.type = I965_SURFACE_TYPE_SURFACE;
3642             src_surface.flags = I965_SURFACE_FLAG_FRAME;
3643             dst_surface.id = out_surface_id;
3644             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
3645             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
3646
3647             i965_post_processing_internal(ctx, i965->pp_context,
3648                                           &src_surface,
3649                                           src_rect,
3650                                           &dst_surface,
3651                                           dst_rect,
3652                                           PP_NV12_AVS,
3653                                           NULL);
3654
3655             if (in_surface_id != surface)
3656                 i965_DestroySurfaces(ctx, &in_surface_id, 1);
3657                 
3658             *has_done_scaling = 1;
3659         }
3660
3661         _i965UnlockMutex(&i965->pp_mutex);
3662     }
3663
3664     return out_surface_id;
3665 }       
3666
3667 static VAStatus
3668 i965_image_pl3_processing(VADriverContextP ctx,
3669                           const struct i965_surface *src_surface,
3670                           const VARectangle *src_rect,
3671                           struct i965_surface *dst_surface,
3672                           const VARectangle *dst_rect)
3673 {
3674     struct i965_driver_data *i965 = i965_driver_data(ctx);
3675     struct i965_post_processing_context *pp_context = i965->pp_context;
3676     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
3677
3678     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
3679         i965_post_processing_internal(ctx, i965->pp_context,
3680                                       src_surface,
3681                                       src_rect,
3682                                       dst_surface,
3683                                       dst_rect,
3684                                       PP_PL3_LOAD_SAVE_N12,
3685                                       NULL);
3686     } else {
3687         i965_post_processing_internal(ctx, i965->pp_context,
3688                                       src_surface,
3689                                       src_rect,
3690                                       dst_surface,
3691                                       dst_rect,
3692                                       PP_PL3_LOAD_SAVE_PL3,
3693                                       NULL);
3694     }
3695
3696     intel_batchbuffer_flush(pp_context->batch);
3697
3698     return VA_STATUS_SUCCESS;
3699 }
3700
3701 static VAStatus
3702 i965_image_pl2_processing(VADriverContextP ctx,
3703                           const struct i965_surface *src_surface,
3704                           const VARectangle *src_rect,
3705                           struct i965_surface *dst_surface,
3706                           const VARectangle *dst_rect)
3707 {
3708     struct i965_driver_data *i965 = i965_driver_data(ctx);
3709     struct i965_post_processing_context *pp_context = i965->pp_context;
3710     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
3711
3712     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
3713         i965_post_processing_internal(ctx, i965->pp_context,
3714                                       src_surface,
3715                                       src_rect,
3716                                       dst_surface,
3717                                       dst_rect,
3718                                       PP_NV12_LOAD_SAVE_N12,
3719                                       NULL);
3720     } else {
3721         i965_post_processing_internal(ctx, i965->pp_context,
3722                                       src_surface,
3723                                       src_rect,
3724                                       dst_surface,
3725                                       dst_rect,
3726                                       PP_NV12_LOAD_SAVE_PL3,
3727                                       NULL);
3728     }
3729
3730     intel_batchbuffer_flush(pp_context->batch);
3731
3732     return VA_STATUS_SUCCESS;
3733 }
3734
3735 VAStatus
3736 i965_image_processing(VADriverContextP ctx,
3737                       const struct i965_surface *src_surface,
3738                       const VARectangle *src_rect,
3739                       struct i965_surface *dst_surface,
3740                       const VARectangle *dst_rect)
3741 {
3742     struct i965_driver_data *i965 = i965_driver_data(ctx);
3743     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
3744
3745     if (HAS_PP(i965)) {
3746         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
3747
3748         _i965LockMutex(&i965->pp_mutex);
3749
3750         switch (fourcc) {
3751         case VA_FOURCC('Y', 'V', '1', '2'):
3752         case VA_FOURCC('I', '4', '2', '0'):
3753         case VA_FOURCC('I', 'M', 'C', '1'):
3754         case VA_FOURCC('I', 'M', 'C', '3'):
3755             status = i965_image_pl3_processing(ctx,
3756                                                src_surface,
3757                                                src_rect,
3758                                                dst_surface,
3759                                                dst_rect);
3760             break;
3761
3762         case  VA_FOURCC('N', 'V', '1', '2'):
3763             status = i965_image_pl2_processing(ctx,
3764                                                src_surface,
3765                                                src_rect,
3766                                                dst_surface,
3767                                                dst_rect);
3768             break;
3769
3770         default:
3771             status = VA_STATUS_ERROR_UNIMPLEMENTED;
3772             break;
3773         }
3774         
3775         _i965UnlockMutex(&i965->pp_mutex);
3776     }
3777
3778     return status;
3779 }       
3780
3781 static void
3782 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
3783 {
3784     int i;
3785
3786     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3787     pp_context->surface_state_binding_table.bo = NULL;
3788
3789     dri_bo_unreference(pp_context->curbe.bo);
3790     pp_context->curbe.bo = NULL;
3791
3792     dri_bo_unreference(pp_context->sampler_state_table.bo);
3793     pp_context->sampler_state_table.bo = NULL;
3794
3795     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3796     pp_context->sampler_state_table.bo_8x8 = NULL;
3797
3798     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3799     pp_context->sampler_state_table.bo_8x8_uv = NULL;
3800
3801     dri_bo_unreference(pp_context->idrt.bo);
3802     pp_context->idrt.bo = NULL;
3803     pp_context->idrt.num_interface_descriptors = 0;
3804
3805     dri_bo_unreference(pp_context->vfe_state.bo);
3806     pp_context->vfe_state.bo = NULL;
3807
3808     dri_bo_unreference(pp_context->stmm.bo);
3809     pp_context->stmm.bo = NULL;
3810
3811     for (i = 0; i < NUM_PP_MODULES; i++) {
3812         struct pp_module *pp_module = &pp_context->pp_modules[i];
3813
3814         dri_bo_unreference(pp_module->kernel.bo);
3815         pp_module->kernel.bo = NULL;
3816     }
3817
3818     free(pp_context->pp_static_parameter);
3819     free(pp_context->pp_inline_parameter);
3820     pp_context->pp_static_parameter = NULL;
3821     pp_context->pp_inline_parameter = NULL;
3822 }
3823
3824 Bool
3825 i965_post_processing_terminate(VADriverContextP ctx)
3826 {
3827     struct i965_driver_data *i965 = i965_driver_data(ctx);
3828     struct i965_post_processing_context *pp_context = i965->pp_context;
3829
3830     if (pp_context) {
3831         i965_post_processing_context_finalize(pp_context);
3832         free(pp_context);
3833     }
3834
3835     i965->pp_context = NULL;
3836
3837     return True;
3838 }
3839
3840 static void
3841 i965_post_processing_context_init(VADriverContextP ctx,
3842                                   struct i965_post_processing_context *pp_context,
3843                                   struct intel_batchbuffer *batch)
3844 {
3845     struct i965_driver_data *i965 = i965_driver_data(ctx);
3846     int i;
3847
3848     pp_context->urb.size = URB_SIZE((&i965->intel));
3849     pp_context->urb.num_vfe_entries = 32;
3850     pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
3851     pp_context->urb.num_cs_entries = 1;
3852     
3853     if (IS_GEN7(i965->intel.device_id))
3854         pp_context->urb.size_cs_entry = 4;      /* in 512 bits unit */
3855     else
3856         pp_context->urb.size_cs_entry = 2;
3857
3858     pp_context->urb.vfe_start = 0;
3859     pp_context->urb.cs_start = pp_context->urb.vfe_start + 
3860         pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
3861     assert(pp_context->urb.cs_start + 
3862            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
3863
3864     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
3865     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
3866     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
3867
3868     if (IS_GEN7(i965->intel.device_id))
3869         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
3870     else if (IS_GEN6(i965->intel.device_id))
3871         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
3872     else if (IS_IRONLAKE(i965->intel.device_id))
3873         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
3874
3875     for (i = 0; i < NUM_PP_MODULES; i++) {
3876         struct pp_module *pp_module = &pp_context->pp_modules[i];
3877         dri_bo_unreference(pp_module->kernel.bo);
3878         if (pp_module->kernel.bin && pp_module->kernel.size) {
3879             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
3880                                                 pp_module->kernel.name,
3881                                                 pp_module->kernel.size,
3882                                                 4096);
3883             assert(pp_module->kernel.bo);
3884             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
3885         } else {
3886             pp_module->kernel.bo = NULL;
3887         }
3888     }
3889
3890     /* static & inline parameters */
3891     if (IS_GEN7(i965->intel.device_id)) {
3892         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
3893         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
3894     } else {
3895         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
3896         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
3897     }
3898
3899     pp_context->batch = batch;
3900 }
3901
3902 Bool
3903 i965_post_processing_init(VADriverContextP ctx)
3904 {
3905     struct i965_driver_data *i965 = i965_driver_data(ctx);
3906     struct i965_post_processing_context *pp_context = i965->pp_context;
3907
3908     if (HAS_PP(i965)) {
3909         if (pp_context == NULL) {
3910             pp_context = calloc(1, sizeof(*pp_context));
3911             i965_post_processing_context_init(ctx, pp_context, i965->batch);
3912             i965->pp_context = pp_context;
3913         }
3914     }
3915
3916     return True;
3917 }
3918
3919 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
3920     PP_NULL,    /* VAProcFilterNone */
3921     PP_NV12_DN, /* VAProcFilterNoiseReduction */
3922     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
3923     PP_NULL,    /* VAProcFilterSharpening */
3924     PP_NULL,    /* VAProcFilterColorBalance */
3925     PP_NULL,    /* VAProcFilterColorStandard */
3926 };
3927
3928 static const int proc_frame_to_pp_frame[3] = {
3929     I965_SURFACE_FLAG_FRAME,
3930     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
3931     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
3932 };
3933
3934 static void 
3935 i965_proc_picture(VADriverContextP ctx, 
3936                   VAProfile profile, 
3937                   union codec_state *codec_state,
3938                   struct hw_context *hw_context)
3939 {
3940     struct i965_driver_data *i965 = i965_driver_data(ctx);
3941     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
3942     struct proc_state *proc_state = &codec_state->proc;
3943     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
3944     struct object_surface *obj_surface;
3945     struct i965_surface src_surface, dst_surface;
3946     VARectangle src_rect, dst_rect;
3947     VAStatus status;
3948     int i;
3949     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
3950     int num_tmp_surfaces = 0;
3951     unsigned int tiling = 0, swizzle = 0;
3952     int in_width, in_height;
3953
3954     assert(pipeline_param->surface != VA_INVALID_ID);
3955     assert(proc_state->current_render_target != VA_INVALID_ID);
3956
3957     obj_surface = SURFACE(pipeline_param->surface);
3958     in_width = obj_surface->orig_width;
3959     in_height = obj_surface->orig_height;
3960     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
3961
3962     src_surface.id = pipeline_param->surface;
3963     src_surface.type = I965_SURFACE_TYPE_SURFACE;
3964     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
3965
3966     if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) {
3967         VASurfaceID out_surface_id = VA_INVALID_ID;
3968
3969         src_surface.id = pipeline_param->surface;
3970         src_surface.type = I965_SURFACE_TYPE_SURFACE;
3971         src_surface.flags = I965_SURFACE_FLAG_FRAME;
3972         src_rect.x = 0;
3973         src_rect.y = 0;
3974         src_rect.width = in_width;
3975         src_rect.height = in_height;
3976
3977         status = i965_CreateSurfaces(ctx,
3978                                      in_width,
3979                                      in_height,
3980                                      VA_RT_FORMAT_YUV420,
3981                                      1,
3982                                      &out_surface_id);
3983         assert(status == VA_STATUS_SUCCESS);
3984         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
3985         obj_surface = SURFACE(out_surface_id);
3986         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
3987
3988         dst_surface.id = out_surface_id;
3989         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
3990         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
3991         dst_rect.x = 0;
3992         dst_rect.y = 0;
3993         dst_rect.width = in_width;
3994         dst_rect.height = in_height;
3995
3996         status = i965_image_processing(ctx,
3997                                        &src_surface,
3998                                        &src_rect,
3999                                        &dst_surface,
4000                                        &dst_rect);
4001         assert(status == VA_STATUS_SUCCESS);
4002
4003         src_surface.id = out_surface_id;
4004         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4005         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
4006     }
4007
4008     if (pipeline_param->surface_region) {
4009         src_rect.x = pipeline_param->surface_region->x;
4010         src_rect.y = pipeline_param->surface_region->y;
4011         src_rect.width = pipeline_param->surface_region->width;
4012         src_rect.height = pipeline_param->surface_region->height;
4013     } else {
4014         src_rect.x = 0;
4015         src_rect.y = 0;
4016         src_rect.width = in_width;
4017         src_rect.height = in_height;
4018     }
4019
4020     if (pipeline_param->output_region) {
4021         dst_rect.x = pipeline_param->output_region->x;
4022         dst_rect.y = pipeline_param->output_region->y;
4023         dst_rect.width = pipeline_param->output_region->width;
4024         dst_rect.height = pipeline_param->output_region->height;
4025     } else {
4026         dst_rect.x = 0;
4027         dst_rect.y = 0;
4028         dst_rect.width = in_width;
4029         dst_rect.height = in_height;
4030     }
4031
4032     obj_surface = SURFACE(proc_state->current_render_target);
4033     i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4034     
4035     for (i = 0; i < pipeline_param->num_filters; i++) {
4036         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
4037         VAProcFilterParameterBufferBase *filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
4038         VAProcFilterType filter_type = filter_param->type;
4039         VASurfaceID out_surface_id = VA_INVALID_ID;
4040
4041         if (procfilter_to_pp_flag[filter_type] != PP_NULL) {
4042             status = i965_CreateSurfaces(ctx,
4043                                          in_width,
4044                                          in_height,
4045                                          VA_RT_FORMAT_YUV420,
4046                                          1,
4047                                          &out_surface_id);
4048             assert(status == VA_STATUS_SUCCESS);
4049             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4050             obj_surface = SURFACE(out_surface_id);
4051             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4052             dst_surface.id = out_surface_id;
4053             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4054             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
4055                                                    &src_surface,
4056                                                    &src_rect,
4057                                                    &dst_surface,
4058                                                    &src_rect,
4059                                                    procfilter_to_pp_flag[filter_type],
4060                                                    filter_param);
4061
4062             if (status == VA_STATUS_SUCCESS) {
4063                 src_surface.id = dst_surface.id;
4064                 src_surface.type = dst_surface.type;
4065                 src_surface.flags = dst_surface.flags;
4066             }
4067         }
4068     }
4069
4070     dst_surface.id = proc_state->current_render_target;
4071     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4072
4073     if (src_rect.width == dst_rect.width &&
4074         src_rect.height == dst_rect.height) {
4075         i965_post_processing_internal(ctx, &proc_context->pp_context,
4076                                       &src_surface,
4077                                       &src_rect,
4078                                       &dst_surface,
4079                                       &dst_rect,
4080                                       PP_NV12_LOAD_SAVE_N12,
4081                                       NULL);
4082     } else {
4083
4084         i965_post_processing_internal(ctx, &proc_context->pp_context,
4085                                       &src_surface,
4086                                       &src_rect,
4087                                       &dst_surface,
4088                                       &dst_rect,
4089                                       (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC ?
4090                                       PP_NV12_AVS : PP_NV12_SCALING,
4091                                       NULL);
4092     }
4093
4094     if (num_tmp_surfaces)
4095         i965_DestroySurfaces(ctx,
4096                              tmp_surfaces,
4097                              num_tmp_surfaces);
4098
4099     intel_batchbuffer_flush(hw_context->batch);
4100 }
4101
4102 static void
4103 i965_proc_context_destroy(void *hw_context)
4104 {
4105     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
4106
4107     i965_post_processing_context_finalize(&proc_context->pp_context);
4108     intel_batchbuffer_free(proc_context->base.batch);
4109     free(proc_context);
4110 }
4111
4112 struct hw_context *
4113 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
4114 {
4115     struct intel_driver_data *intel = intel_driver_data(ctx);
4116     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
4117
4118     proc_context->base.destroy = i965_proc_context_destroy;
4119     proc_context->base.run = i965_proc_picture;
4120     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
4121     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
4122
4123     return (struct hw_context *)proc_context;
4124 }